diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
149 files changed, 107890 insertions, 0 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp new file mode 100644 index 0000000..5a634d6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -0,0 +1,980 @@ +//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AggressiveAntiDepBreaker class, which +// implements register anti-dependence breaking during post-RA +// scheduling. It attempts to break all anti-dependencies within a +// block. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "AggressiveAntiDepBreaker.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod +static cl::opt<int> +DebugDiv("agg-antidep-debugdiv", + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); +static cl::opt<int> +DebugMod("agg-antidep-debugmod", + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); + +AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, + MachineBasicBlock *BB) : + NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0), + GroupNodeIndices(TargetRegs, 0), + KillIndices(TargetRegs, 0), + DefIndices(TargetRegs, 0) +{ + const unsigned BBSize = BB->size(); + for (unsigned i = 0; i < NumTargetRegs; ++i) { + // Initialize all registers to be in their own group. Initially we + // assign the register to the same-indexed GroupNode. + GroupNodeIndices[i] = i; + // Initialize the indices to indicate that no registers are live. + KillIndices[i] = ~0u; + DefIndices[i] = BBSize; + } +} + +unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) { + unsigned Node = GroupNodeIndices[Reg]; + while (GroupNodes[Node] != Node) + Node = GroupNodes[Node]; + + return Node; +} + +void AggressiveAntiDepState::GetGroupRegs( + unsigned Group, + std::vector<unsigned> &Regs, + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs) +{ + for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) { + if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0)) + Regs.push_back(Reg); + } +} + +unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) +{ + assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!"); + assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!"); + + // find group for each register + unsigned Group1 = GetGroup(Reg1); + unsigned Group2 = GetGroup(Reg2); + + // if either group is 0, then that must become the parent + unsigned Parent = (Group1 == 0) ? Group1 : Group2; + unsigned Other = (Parent == Group1) ? Group2 : Group1; + GroupNodes.at(Other) = Parent; + return Parent; +} + +unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) +{ + // Create a new GroupNode for Reg. Reg's existing GroupNode must + // stay as is because there could be other GroupNodes referring to + // it. + unsigned idx = GroupNodes.size(); + GroupNodes.push_back(idx); + GroupNodeIndices[Reg] = idx; + return idx; +} + +bool AggressiveAntiDepState::IsLive(unsigned Reg) +{ + // KillIndex must be defined and DefIndex not defined for a register + // to be live. + return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u)); +} + + + +AggressiveAntiDepBreaker:: +AggressiveAntiDepBreaker(MachineFunction& MFi, + TargetSubtarget::RegClassVector& CriticalPathRCs) : + AntiDepBreaker(), MF(MFi), + MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), + TRI(MF.getTarget().getRegisterInfo()), + AllocatableSet(TRI->getAllocatableSet(MF)), + State(NULL) { + /* Collect a bitset of all registers that are only broken if they + are on the critical path. */ + for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { + BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]); + if (CriticalPathSet.none()) + CriticalPathSet = CPSet; + else + CriticalPathSet |= CPSet; + } + + DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); + DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; + r = CriticalPathSet.find_next(r)) + dbgs() << " " << TRI->getName(r)); + DEBUG(dbgs() << '\n'); +} + +AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { + delete State; +} + +void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { + assert(State == NULL); + State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); + + bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + + // Determine the live-out physregs for this block. + if (IsReturnBlock) { + // In a return block, examine the function live-out regs. + for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), + E = MRI.liveout_end(); I != E; ++I) { + unsigned Reg = *I; + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + State->UnionGroups(AliasReg, 0); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } + } + + // In a non-return block, examine the live-in regs of all successors. + // Note a return block can have successors if the return instruction is + // predicated. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + State->UnionGroups(AliasReg, 0); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } + + // Mark live-out callee-saved registers. In a return block this is + // all callee-saved registers. In non-return this is any + // callee-saved register that is not saved in the prolog. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + BitVector Pristine = MFI->getPristineRegs(BB); + for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + unsigned Reg = *I; + if (!IsReturnBlock && !Pristine.test(Reg)) continue; + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + State->UnionGroups(AliasReg, 0); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } +} + +void AggressiveAntiDepBreaker::FinishBlock() { + delete State; + State = NULL; +} + +void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) { + assert(Count < InsertPosIndex && "Instruction index out of expected range!"); + + std::set<unsigned> PassthruRegs; + GetPassthruRegs(MI, PassthruRegs); + PrescanInstruction(MI, Count, PassthruRegs); + ScanInstruction(MI, Count); + + DEBUG(dbgs() << "Observe: "); + DEBUG(MI->dump()); + DEBUG(dbgs() << "\tRegs:"); + + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { + // If Reg is current live, then mark that it can't be renamed as + // we don't know the extent of its live-range anymore (now that it + // has been scheduled). If it is not live but was defined in the + // previous schedule region, then set its def index to the most + // conservative location (i.e. the beginning of the previous + // schedule region). + if (State->IsLive(Reg)) { + DEBUG(if (State->GetGroup(Reg) != 0) + dbgs() << " " << TRI->getName(Reg) << "=g" << + State->GetGroup(Reg) << "->g0(region live-out)"); + State->UnionGroups(Reg, 0); + } else if ((DefIndices[Reg] < InsertPosIndex) + && (DefIndices[Reg] >= Count)) { + DefIndices[Reg] = Count; + } + } + DEBUG(dbgs() << '\n'); +} + +bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI, + MachineOperand& MO) +{ + if (!MO.isReg() || !MO.isImplicit()) + return false; + + unsigned Reg = MO.getReg(); + if (Reg == 0) + return false; + + MachineOperand *Op = NULL; + if (MO.isDef()) + Op = MI->findRegisterUseOperand(Reg, true); + else + Op = MI->findRegisterDefOperand(Reg); + + return((Op != NULL) && Op->isImplicit()); +} + +void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, + std::set<unsigned>& PassthruRegs) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || + IsImplicitDefUse(MI, MO)) { + const unsigned Reg = MO.getReg(); + PassthruRegs.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + PassthruRegs.insert(*Subreg); + } + } + } +} + +/// AntiDepEdges - Return in Edges the anti- and output- dependencies +/// in SU that we want to consider for breaking. +static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) { + SmallSet<unsigned, 4> RegSet; + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) { + unsigned Reg = P->getReg(); + if (RegSet.count(Reg) == 0) { + Edges.push_back(&*P); + RegSet.insert(Reg); + } + } + } +} + +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static const SUnit *CriticalPathStep(const SUnit *SU) { + const SDep *Next = 0; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + if (SU != 0) { + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + const SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + } + + return (Next) ? Next->getSUnit() : 0; +} + +void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, + const char *tag, + const char *header, + const char *footer) { + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + if (!State->IsLive(Reg)) { + KillIndices[Reg] = KillIdx; + DefIndices[Reg] = ~0u; + RegRefs.erase(Reg); + State->LeaveGroup(Reg); + DEBUG(if (header != NULL) { + dbgs() << header << TRI->getName(Reg); header = NULL; }); + DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); + } + // Repeat for subregisters. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + unsigned SubregReg = *Subreg; + if (!State->IsLive(SubregReg)) { + KillIndices[SubregReg] = KillIdx; + DefIndices[SubregReg] = ~0u; + RegRefs.erase(SubregReg); + State->LeaveGroup(SubregReg); + DEBUG(if (header != NULL) { + dbgs() << header << TRI->getName(Reg); header = NULL; }); + DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" << + State->GetGroup(SubregReg) << tag); + } + } + + DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer); +} + +void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, + unsigned Count, + std::set<unsigned>& PassthruRegs) { + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // Handle dead defs by simulating a last-use of the register just + // after the def. A dead def can occur because the def is truely + // dead, or because only a subregister is live at the def. If we + // don't do this the dead def will be incorrectly merged into the + // previous def. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n"); + } + + DEBUG(dbgs() << "\tDef Groups:"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); + + // If MI's defs have a special allocation requirement, don't allow + // any def registers to be changed. Also assume all registers + // defined in a call must not be changed (ABI). + if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) { + DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); + State->UnionGroups(Reg, 0); + } + + // Any aliased that are live at this point are completely or + // partially defined here, so group those aliases with Reg. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (State->IsLive(AliasReg)) { + State->UnionGroups(Reg, AliasReg); + DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << + TRI->getName(AliasReg) << ")"); + } + } + + // Note register reference... + const TargetRegisterClass *RC = NULL; + if (i < MI->getDesc().getNumOperands()) + RC = MI->getDesc().OpInfo[i].getRegClass(TRI); + AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; + RegRefs.insert(std::make_pair(Reg, RR)); + } + + DEBUG(dbgs() << '\n'); + + // Scan the register defs for this instruction and update + // live-ranges. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + // Ignore KILLs and passthru registers for liveness... + if (MI->isKill() || (PassthruRegs.count(Reg) != 0)) + continue; + + // Update def for Reg and aliases. + DefIndices[Reg] = Count; + for (const unsigned *Alias = TRI->getAliasSet(Reg); + *Alias; ++Alias) { + unsigned AliasReg = *Alias; + DefIndices[AliasReg] = Count; + } + } +} + +void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, + unsigned Count) { + DEBUG(dbgs() << "\tUse Groups:"); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // If MI's uses have special allocation requirement, don't allow + // any use registers to be changed. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->getDesc().isCall() || + MI->getDesc().hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + + // Scan the register uses for this instruction and update + // live-ranges, groups and RegRefs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << + State->GetGroup(Reg)); + + // It wasn't previously live but now it is, this is a kill. Forget + // the previous live-range information and start a new live-range + // for the register. + HandleLastUse(Reg, Count, "(last-use)"); + + if (Special) { + DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); + State->UnionGroups(Reg, 0); + } + + // Note register reference... + const TargetRegisterClass *RC = NULL; + if (i < MI->getDesc().getNumOperands()) + RC = MI->getDesc().OpInfo[i].getRegClass(TRI); + AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; + RegRefs.insert(std::make_pair(Reg, RR)); + } + + DEBUG(dbgs() << '\n'); + + // Form a group of all defs and uses of a KILL instruction to ensure + // that all registers are renamed as a group. + if (MI->isKill()) { + DEBUG(dbgs() << "\tKill Group:"); + + unsigned FirstReg = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (FirstReg != 0) { + DEBUG(dbgs() << "=" << TRI->getName(Reg)); + State->UnionGroups(FirstReg, Reg); + } else { + DEBUG(dbgs() << " " << TRI->getName(Reg)); + FirstReg = Reg; + } + } + + DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n'); + } +} + +BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { + BitVector BV(TRI->getNumRegs(), false); + bool first = true; + + // Check all references that need rewriting for Reg. For each, use + // the corresponding register class to narrow the set of registers + // that are appropriate for renaming. + std::pair<std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator, + std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator> + Range = State->GetRegRefs().equal_range(Reg); + for (std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first, + QE = Range.second; Q != QE; ++Q) { + const TargetRegisterClass *RC = Q->second.RC; + if (RC == NULL) continue; + + BitVector RCBV = TRI->getAllocatableSet(MF, RC); + if (first) { + BV |= RCBV; + first = false; + } else { + BV &= RCBV; + } + + DEBUG(dbgs() << " " << RC->getName()); + } + + return BV; +} + +bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( + unsigned AntiDepGroupIndex, + RenameOrderType& RenameOrder, + std::map<unsigned, unsigned> &RenameMap) { + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // Collect all referenced registers in the same group as + // AntiDepReg. These all need to be renamed together if we are to + // break the anti-dependence. + std::vector<unsigned> Regs; + State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs); + assert(Regs.size() > 0 && "Empty register group!"); + if (Regs.size() == 0) + return false; + + // Find the "superest" register in the group. At the same time, + // collect the BitVector of registers that can be used to rename + // each register. + DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex + << ":\n"); + std::map<unsigned, BitVector> RenameRegisterMap; + unsigned SuperReg = 0; + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + unsigned Reg = Regs[i]; + if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg)) + SuperReg = Reg; + + // If Reg has any references, then collect possible rename regs + if (RegRefs.count(Reg) > 0) { + DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":"); + + BitVector BV = GetRenameRegisters(Reg); + RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV)); + + DEBUG(dbgs() << " ::"); + DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r)) + dbgs() << " " << TRI->getName(r)); + DEBUG(dbgs() << "\n"); + } + } + + // All group registers should be a subreg of SuperReg. + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + unsigned Reg = Regs[i]; + if (Reg == SuperReg) continue; + bool IsSub = TRI->isSubRegister(SuperReg, Reg); + assert(IsSub && "Expecting group subregister"); + if (!IsSub) + return false; + } + +#ifndef NDEBUG + // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod + if (DebugDiv > 0) { + static int renamecnt = 0; + if (renamecnt++ % DebugDiv != DebugMod) + return false; + + dbgs() << "*** Performing rename " << TRI->getName(SuperReg) << + " for debug ***\n"; + } +#endif + + // Check each possible rename register for SuperReg in round-robin + // order. If that register is available, and the corresponding + // registers are available for the other group subregisters, then we + // can use those registers to rename. + + // FIXME: Using getMinimalPhysRegClass is very conservative. We should + // check every use of the register and find the largest register class + // that can be used in all of them. + const TargetRegisterClass *SuperRC = + TRI->getMinimalPhysRegClass(SuperReg, MVT::Other); + + const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF); + const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF); + if (RB == RE) { + DEBUG(dbgs() << "\tEmpty Super Regclass!!\n"); + return false; + } + + DEBUG(dbgs() << "\tFind Registers:"); + + if (RenameOrder.count(SuperRC) == 0) + RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE)); + + const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC]; + const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR); + TargetRegisterClass::iterator R = OrigR; + do { + if (R == RB) R = RE; + --R; + const unsigned NewSuperReg = *R; + // Don't consider non-allocatable registers + if (!AllocatableSet.test(NewSuperReg)) continue; + // Don't replace a register with itself. + if (NewSuperReg == SuperReg) continue; + + DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':'); + RenameMap.clear(); + + // For each referenced group register (which must be a SuperReg or + // a subregister of SuperReg), find the corresponding subregister + // of NewSuperReg and make sure it is free to be renamed. + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + unsigned Reg = Regs[i]; + unsigned NewReg = 0; + if (Reg == SuperReg) { + NewReg = NewSuperReg; + } else { + unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg); + if (NewSubRegIdx != 0) + NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx); + } + + DEBUG(dbgs() << " " << TRI->getName(NewReg)); + + // Check if Reg can be renamed to NewReg. + BitVector BV = RenameRegisterMap[Reg]; + if (!BV.test(NewReg)) { + DEBUG(dbgs() << "(no rename)"); + goto next_super_reg; + } + + // If NewReg is dead and NewReg's most recent def is not before + // Regs's kill, it's safe to replace Reg with NewReg. We + // must also check all aliases of NewReg, because we can't define a + // register when any sub or super is already live. + if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) { + DEBUG(dbgs() << "(live)"); + goto next_super_reg; + } else { + bool found = false; + for (const unsigned *Alias = TRI->getAliasSet(NewReg); + *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (State->IsLive(AliasReg) || + (KillIndices[Reg] > DefIndices[AliasReg])) { + DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)"); + found = true; + break; + } + } + if (found) + goto next_super_reg; + } + + // Record that 'Reg' can be renamed to 'NewReg'. + RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg)); + } + + // If we fall-out here, then every register in the group can be + // renamed, as recorded in RenameMap. + RenameOrder.erase(SuperRC); + RenameOrder.insert(RenameOrderType::value_type(SuperRC, R)); + DEBUG(dbgs() << "]\n"); + return true; + + next_super_reg: + DEBUG(dbgs() << ']'); + } while (R != EndR); + + DEBUG(dbgs() << '\n'); + + // No registers are free and available! + return false; +} + +/// BreakAntiDependencies - Identifiy anti-dependencies within the +/// ScheduleDAG and break them by renaming registers. +/// +unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( + const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex) { + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // The code below assumes that there is at least one instruction, + // so just duck out immediately if the block is empty. + if (SUnits.empty()) return 0; + + // For each regclass the next register to use for renaming. + RenameOrderType RenameOrder; + + // ...need a map from MI to SUnit. + std::map<MachineInstr *, const SUnit *> MISUnitMap; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + const SUnit *SU = &SUnits[i]; + MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(), + SU)); + } + + // Track progress along the critical path through the SUnit graph as + // we walk the instructions. This is needed for regclasses that only + // break critical-path anti-dependencies. + const SUnit *CriticalPathSU = 0; + MachineInstr *CriticalPathMI = 0; + if (CriticalPathSet.any()) { + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + const SUnit *SU = &SUnits[i]; + if (!CriticalPathSU || + ((SU->getDepth() + SU->Latency) > + (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { + CriticalPathSU = SU; + } + } + + CriticalPathMI = CriticalPathSU->getInstr(); + } + +#ifndef NDEBUG + DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n"); + DEBUG(dbgs() << "Available regs:"); + for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { + if (!State->IsLive(Reg)) + DEBUG(dbgs() << " " << TRI->getName(Reg)); + } + DEBUG(dbgs() << '\n'); +#endif + + // Attempt to break anti-dependence edges. Walk the instructions + // from the bottom up, tracking information about liveness as we go + // to help determine which registers are available. + unsigned Broken = 0; + unsigned Count = InsertPosIndex - 1; + for (MachineBasicBlock::iterator I = End, E = Begin; + I != E; --Count) { + MachineInstr *MI = --I; + + DEBUG(dbgs() << "Anti: "); + DEBUG(MI->dump()); + + std::set<unsigned> PassthruRegs; + GetPassthruRegs(MI, PassthruRegs); + + // Process the defs in MI... + PrescanInstruction(MI, Count, PassthruRegs); + + // The dependence edges that represent anti- and output- + // dependencies that are candidates for breaking. + std::vector<const SDep *> Edges; + const SUnit *PathSU = MISUnitMap[MI]; + AntiDepEdges(PathSU, Edges); + + // If MI is not on the critical path, then we don't rename + // registers in the CriticalPathSet. + BitVector *ExcludeRegs = NULL; + if (MI == CriticalPathMI) { + CriticalPathSU = CriticalPathStep(CriticalPathSU); + CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; + } else { + ExcludeRegs = &CriticalPathSet; + } + + // Ignore KILL instructions (they form a group in ScanInstruction + // but don't cause any anti-dependence breaking themselves) + if (!MI->isKill()) { + // Attempt to break each anti-dependency... + for (unsigned i = 0, e = Edges.size(); i != e; ++i) { + const SDep *Edge = Edges[i]; + SUnit *NextSU = Edge->getSUnit(); + + if ((Edge->getKind() != SDep::Anti) && + (Edge->getKind() != SDep::Output)) continue; + + unsigned AntiDepReg = Edge->getReg(); + DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); + assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); + + if (!AllocatableSet.test(AntiDepReg)) { + // Don't break anti-dependencies on non-allocatable registers. + DEBUG(dbgs() << " (non-allocatable)\n"); + continue; + } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) { + // Don't break anti-dependencies for critical path registers + // if not on the critical path + DEBUG(dbgs() << " (not critical-path)\n"); + continue; + } else if (PassthruRegs.count(AntiDepReg) != 0) { + // If the anti-dep register liveness "passes-thru", then + // don't try to change it. It will be changed along with + // the use if required to break an earlier antidep. + DEBUG(dbgs() << " (passthru)\n"); + continue; + } else { + // No anti-dep breaking for implicit deps + MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg); + assert(AntiDepOp != NULL && + "Can't find index for defined register operand"); + if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) { + DEBUG(dbgs() << " (implicit)\n"); + continue; + } + + // If the SUnit has other dependencies on the SUnit that + // it anti-depends on, don't bother breaking the + // anti-dependency since those edges would prevent such + // units from being scheduled past each other + // regardless. + // + // Also, if there are dependencies on other SUnits with the + // same register as the anti-dependency, don't attempt to + // break it. + for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), + PE = PathSU->Preds.end(); P != PE; ++P) { + if (P->getSUnit() == NextSU ? + (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : + (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + AntiDepReg = 0; + break; + } + } + for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), + PE = PathSU->Preds.end(); P != PE; ++P) { + if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) && + (P->getKind() != SDep::Output)) { + DEBUG(dbgs() << " (real dependency)\n"); + AntiDepReg = 0; + break; + } else if ((P->getSUnit() != NextSU) && + (P->getKind() == SDep::Data) && + (P->getReg() == AntiDepReg)) { + DEBUG(dbgs() << " (other dependency)\n"); + AntiDepReg = 0; + break; + } + } + + if (AntiDepReg == 0) continue; + } + + assert(AntiDepReg != 0); + if (AntiDepReg == 0) continue; + + // Determine AntiDepReg's register group. + const unsigned GroupIndex = State->GetGroup(AntiDepReg); + if (GroupIndex == 0) { + DEBUG(dbgs() << " (zero group)\n"); + continue; + } + + DEBUG(dbgs() << '\n'); + + // Look for a suitable register to use to break the anti-dependence. + std::map<unsigned, unsigned> RenameMap; + if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) { + DEBUG(dbgs() << "\tBreaking anti-dependence edge on " + << TRI->getName(AntiDepReg) << ":"); + + // Handle each group register... + for (std::map<unsigned, unsigned>::iterator + S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) { + unsigned CurrReg = S->first; + unsigned NewReg = S->second; + + DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" << + TRI->getName(NewReg) << "(" << + RegRefs.count(CurrReg) << " refs)"); + + // Update the references to the old register CurrReg to + // refer to the new register NewReg. + std::pair<std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator, + std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator> + Range = RegRefs.equal_range(CurrReg); + for (std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator + Q = Range.first, QE = Range.second; Q != QE; ++Q) { + Q->second.Operand->setReg(NewReg); + // If the SU for the instruction being updated has debug + // information related to the anti-dependency register, make + // sure to update that as well. + const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()]; + if (!SU) continue; + for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) { + MachineInstr *DI = SU->DbgInstrList[i]; + assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() + && "Non register dbg_value attached to SUnit!"); + if (DI->getOperand(0).getReg() == AntiDepReg) + DI->getOperand(0).setReg(NewReg); + } + } + + // We just went back in time and modified history; the + // liveness information for CurrReg is now inconsistent. Set + // the state as if it were dead. + State->UnionGroups(NewReg, 0); + RegRefs.erase(NewReg); + DefIndices[NewReg] = DefIndices[CurrReg]; + KillIndices[NewReg] = KillIndices[CurrReg]; + + State->UnionGroups(CurrReg, 0); + RegRefs.erase(CurrReg); + DefIndices[CurrReg] = KillIndices[CurrReg]; + KillIndices[CurrReg] = ~0u; + assert(((KillIndices[CurrReg] == ~0u) != + (DefIndices[CurrReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + } + + ++Broken; + DEBUG(dbgs() << '\n'); + } + } + } + + ScanInstruction(MI, Count); + } + + return Broken; +} diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h new file mode 100644 index 0000000..9d715cc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -0,0 +1,184 @@ +//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AggressiveAntiDepBreaker class, which +// implements register anti-dependence breaking during post-RA +// scheduling. It attempts to break all anti-dependencies within a +// block. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H +#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H + +#include "AntiDepBreaker.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include <map> + +namespace llvm { + /// Class AggressiveAntiDepState + /// Contains all the state necessary for anti-dep breaking. + class AggressiveAntiDepState { + public: + /// RegisterReference - Information about a register reference + /// within a liverange + typedef struct { + /// Operand - The registers operand + MachineOperand *Operand; + /// RC - The register class + const TargetRegisterClass *RC; + } RegisterReference; + + private: + /// NumTargetRegs - Number of non-virtual target registers + /// (i.e. TRI->getNumRegs()). + const unsigned NumTargetRegs; + + /// GroupNodes - Implements a disjoint-union data structure to + /// form register groups. A node is represented by an index into + /// the vector. A node can "point to" itself to indicate that it + /// is the parent of a group, or point to another node to indicate + /// that it is a member of the same group as that node. + std::vector<unsigned> GroupNodes; + + /// GroupNodeIndices - For each register, the index of the GroupNode + /// currently representing the group that the register belongs to. + /// Register 0 is always represented by the 0 group, a group + /// composed of registers that are not eligible for anti-aliasing. + std::vector<unsigned> GroupNodeIndices; + + /// RegRefs - Map registers to all their references within a live range. + std::multimap<unsigned, RegisterReference> RegRefs; + + /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// or ~0u if the register is not live. + std::vector<unsigned> KillIndices; + + /// DefIndices - The index of the most recent complete def (proceding bottom + /// up), or ~0u if the register is live. + std::vector<unsigned> DefIndices; + + public: + AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB); + + /// GetKillIndices - Return the kill indices. + std::vector<unsigned> &GetKillIndices() { return KillIndices; } + + /// GetDefIndices - Return the define indices. + std::vector<unsigned> &GetDefIndices() { return DefIndices; } + + /// GetRegRefs - Return the RegRefs map. + std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; } + + // GetGroup - Get the group for a register. The returned value is + // the index of the GroupNode representing the group. + unsigned GetGroup(unsigned Reg); + + // GetGroupRegs - Return a vector of the registers belonging to a + // group. If RegRefs is non-NULL then only included referenced registers. + void GetGroupRegs( + unsigned Group, + std::vector<unsigned> &Regs, + std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference> *RegRefs); + + // UnionGroups - Union Reg1's and Reg2's groups to form a new + // group. Return the index of the GroupNode representing the + // group. + unsigned UnionGroups(unsigned Reg1, unsigned Reg2); + + // LeaveGroup - Remove a register from its current group and place + // it alone in its own group. Return the index of the GroupNode + // representing the registers new group. + unsigned LeaveGroup(unsigned Reg); + + /// IsLive - Return true if Reg is live + bool IsLive(unsigned Reg); + }; + + + /// Class AggressiveAntiDepBreaker + class AggressiveAntiDepBreaker : public AntiDepBreaker { + MachineFunction& MF; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + + /// AllocatableSet - The set of allocatable registers. + /// We'll be ignoring anti-dependencies on non-allocatable registers, + /// because they may not be safe to break. + const BitVector AllocatableSet; + + /// CriticalPathSet - The set of registers that should only be + /// renamed if they are on the critical path. + BitVector CriticalPathSet; + + /// State - The state used to identify and rename anti-dependence + /// registers. + AggressiveAntiDepState *State; + + public: + AggressiveAntiDepBreaker(MachineFunction& MFi, + TargetSubtarget::RegClassVector& CriticalPathRCs); + ~AggressiveAntiDepBreaker(); + + /// Start - Initialize anti-dep breaking for a new basic block. + void StartBlock(MachineBasicBlock *BB); + + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical + /// path + /// of the ScheduleDAG and break them by renaming registers. + /// + unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex); + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex); + + /// Finish - Finish anti-dep breaking for a basic block. + void FinishBlock(); + + private: + typedef std::map<const TargetRegisterClass *, + TargetRegisterClass::const_iterator> RenameOrderType; + + /// IsImplicitDefUse - Return true if MO represents a register + /// that is both implicitly used and defined in MI + bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO); + + /// GetPassthruRegs - If MI implicitly def/uses a register, then + /// return that register and all subregisters. + void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs); + + void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag, + const char *header =NULL, const char *footer =NULL); + + void PrescanInstruction(MachineInstr *MI, unsigned Count, + std::set<unsigned>& PassthruRegs); + void ScanInstruction(MachineInstr *MI, unsigned Count); + BitVector GetRenameRegisters(unsigned Reg); + bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex, + RenameOrderType& RenameOrder, + std::map<unsigned, unsigned> &RenameMap); + }; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp new file mode 100644 index 0000000..e3dd646 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -0,0 +1,285 @@ +//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines several CodeGen-specific LLVM IR analysis utilties. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Analysis.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence +/// of insertvalue or extractvalue indices that identify a member, return +/// the linearized index of the start of the member. +/// +unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, + const unsigned *Indices, + const unsigned *IndicesEnd, + unsigned CurIndex) { + // Base case: We're done. + if (Indices && Indices == IndicesEnd) + return CurIndex; + + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) { + if (Indices && *Indices == unsigned(EI - EB)) + return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); + } + return CurIndex; + } + // Given an array type, recursively traverse the elements. + else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = ATy->getElementType(); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + if (Indices && *Indices == i) + return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); + } + return CurIndex; + } + // We haven't found the type we're looking for, so keep searching. + return CurIndex + 1; +} + +/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of +/// EVTs that represent all the individual underlying +/// non-aggregate types that comprise it. +/// +/// If Offsets is non-null, it points to a vector to be filled in +/// with the in-memory offsets of each of the individual values. +/// +void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, + SmallVectorImpl<EVT> &ValueVTs, + SmallVectorImpl<uint64_t> *Offsets, + uint64_t StartingOffset) { + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) + ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, + StartingOffset + SL->getElementOffset(EI - EB)); + return; + } + // Given an array type, recursively traverse the elements. + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = ATy->getElementType(); + uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty->isVoidTy()) + return; + // Base case: we can get an EVT for this LLVM IR type. + ValueVTs.push_back(TLI.getValueType(Ty)); + if (Offsets) + Offsets->push_back(StartingOffset); +} + +/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. +GlobalVariable *llvm::ExtractTypeInfo(Value *V) { + V = V->stripPointerCasts(); + GlobalVariable *GV = dyn_cast<GlobalVariable>(V); + + if (GV && GV->getName() == "llvm.eh.catch.all.value") { + assert(GV->hasInitializer() && + "The EH catch-all value must have an initializer"); + Value *Init = GV->getInitializer(); + GV = dyn_cast<GlobalVariable>(Init); + if (!GV) V = cast<ConstantPointerNull>(Init); + } + + assert((GV || isa<ConstantPointerNull>(V)) && + "TypeInfo must be a global variable or NULL"); + return GV; +} + +/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being +/// processed uses a memory 'm' constraint. +bool +llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos, + const TargetLowering &TLI) { + for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { + InlineAsm::ConstraintInfo &CI = CInfos[i]; + for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); + if (CType == TargetLowering::C_Memory) + return true; + } + + // Indirect operand accesses access memory. + if (CI.isIndirect) + return true; + } + + return false; +} + +/// getFCmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR floating-point condition code. This includes +/// consideration of global floating-point math flags. +/// +ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) { + ISD::CondCode FPC, FOC; + switch (Pred) { + case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; + case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; + case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; + case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; + case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; + case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; + case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; + case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; + case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; + case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; + case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; + case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; + case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; + case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; + case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; + case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; + default: + llvm_unreachable("Invalid FCmp predicate opcode!"); + FOC = FPC = ISD::SETFALSE; + break; + } + if (NoNaNsFPMath) + return FOC; + else + return FPC; +} + +/// getICmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR integer condition code. +/// +ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { + switch (Pred) { + case ICmpInst::ICMP_EQ: return ISD::SETEQ; + case ICmpInst::ICMP_NE: return ISD::SETNE; + case ICmpInst::ICMP_SLE: return ISD::SETLE; + case ICmpInst::ICMP_ULE: return ISD::SETULE; + case ICmpInst::ICMP_SGE: return ISD::SETGE; + case ICmpInst::ICMP_UGE: return ISD::SETUGE; + case ICmpInst::ICMP_SLT: return ISD::SETLT; + case ICmpInst::ICMP_ULT: return ISD::SETULT; + case ICmpInst::ICMP_SGT: return ISD::SETGT; + case ICmpInst::ICMP_UGT: return ISD::SETUGT; + default: + llvm_unreachable("Invalid ICmp predicate opcode!"); + return ISD::SETNE; + } +} + +/// Test if the given instruction is in a position to be optimized +/// with a tail-call. This roughly means that it's in a block with +/// a return and there's nothing that needs to be scheduled +/// between it and the return. +/// +/// This function only tests target-independent requirements. +bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, + const TargetLowering &TLI) { + const Instruction *I = CS.getInstruction(); + const BasicBlock *ExitBB = I->getParent(); + const TerminatorInst *Term = ExitBB->getTerminator(); + const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); + const Function *F = ExitBB->getParent(); + + // The block must end in a return statement or unreachable. + // + // FIXME: Decline tailcall if it's not guaranteed and if the block ends in + // an unreachable, for now. The way tailcall optimization is currently + // implemented means it will add an epilogue followed by a jump. That is + // not profitable. Also, if the callee is a special function (e.g. + // longjmp on x86), it can end up causing miscompilation that has not + // been fully understood. + if (!Ret && + (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false; + + // If I will have a chain, make sure no other instruction that will have a + // chain interposes between I and the return. + if (I->mayHaveSideEffects() || I->mayReadFromMemory() || + !I->isSafeToSpeculativelyExecute()) + for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; + --BBI) { + if (&*BBI == I) + break; + // Debug info intrinsics do not get in the way of tail call optimization. + if (isa<DbgInfoIntrinsic>(BBI)) + continue; + if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || + !BBI->isSafeToSpeculativelyExecute()) + return false; + } + + // If the block ends with a void return or unreachable, it doesn't matter + // what the call's return type is. + if (!Ret || Ret->getNumOperands() == 0) return true; + + // If the return value is undef, it doesn't matter what the call's + // return type is. + if (isa<UndefValue>(Ret->getOperand(0))) return true; + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + return false; + + // Otherwise, make sure the unmodified return value of I is the return value. + for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ; + U = dyn_cast<Instruction>(U->getOperand(0))) { + if (!U) + return false; + if (!U->hasOneUse()) + return false; + if (U == I) + break; + // Check for a truly no-op truncate. + if (isa<TruncInst>(U) && + TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType())) + continue; + // Check for a truly no-op bitcast. + if (isa<BitCastInst>(U) && + (U->getOperand(0)->getType() == U->getType() || + (U->getOperand(0)->getType()->isPointerTy() && + U->getType()->isPointerTy()))) + continue; + // Otherwise it's not a true no-op. + return false; + } + + return true; +} + diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h new file mode 100644 index 0000000..086b757 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h @@ -0,0 +1,59 @@ +//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AntiDepBreaker class, which implements +// anti-dependence breaking heuristics for post-register-allocation scheduling. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H +#define LLVM_CODEGEN_ANTIDEPBREAKER_H + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <vector> + +namespace llvm { + +/// AntiDepBreaker - This class works into conjunction with the +/// post-RA scheduler to rename registers to break register +/// anti-dependencies. +class AntiDepBreaker { +public: + virtual ~AntiDepBreaker(); + + /// Start - Initialize anti-dep breaking for a new basic block. + virtual void StartBlock(MachineBasicBlock *BB) =0; + + /// BreakAntiDependencies - Identifiy anti-dependencies within a + /// basic-block region and break them by renaming registers. Return + /// the number of anti-dependencies broken. + /// + virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex) =0; + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + virtual void Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) =0; + + /// Finish - Finish anti-dep breaking for a basic block. + virtual void FinishBlock() =0; +}; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp new file mode 100644 index 0000000..d358ab2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -0,0 +1,1867 @@ +//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AsmPrinter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "llvm/CodeGen/AsmPrinter.h" +#include "DwarfDebug.h" +#include "DwarfException.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Timer.h" +using namespace llvm; + +static const char *DWARFGroupName = "DWARF Emission"; +static const char *DbgTimerName = "DWARF Debug Writer"; +static const char *EHTimerName = "DWARF Exception Writer"; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +char AsmPrinter::ID = 0; + +typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type; +static gcp_map_type &getGCMap(void *&P) { + if (P == 0) + P = new gcp_map_type(); + return *(gcp_map_type*)P; +} + + +/// getGVAlignmentLog2 - Return the alignment to use for the specified global +/// value in log2 form. This rounds up to the preferred alignment if possible +/// and legal. +static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD, + unsigned InBits = 0) { + unsigned NumBits = 0; + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + NumBits = TD.getPreferredAlignmentLog(GVar); + + // If InBits is specified, round it to it. + if (InBits > NumBits) + NumBits = InBits; + + // If the GV has a specified alignment, take it into account. + if (GV->getAlignment() == 0) + return NumBits; + + unsigned GVAlign = Log2_32(GV->getAlignment()); + + // If the GVAlign is larger than NumBits, or if we are required to obey + // NumBits because the GV has an assigned section, obey it. + if (GVAlign > NumBits || GV->hasSection()) + NumBits = GVAlign; + return NumBits; +} + + + + +AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) + : MachineFunctionPass(ID), + TM(tm), MAI(tm.getMCAsmInfo()), + OutContext(Streamer.getContext()), + OutStreamer(Streamer), + LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { + DD = 0; DE = 0; MMI = 0; LI = 0; + GCMetadataPrinters = 0; + VerboseAsm = Streamer.isVerboseAsm(); +} + +AsmPrinter::~AsmPrinter() { + assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized"); + + if (GCMetadataPrinters != 0) { + gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); + + for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I) + delete I->second; + delete &GCMap; + GCMetadataPrinters = 0; + } + + delete &OutStreamer; +} + +/// getFunctionNumber - Return a unique ID for the current function. +/// +unsigned AsmPrinter::getFunctionNumber() const { + return MF->getFunctionNumber(); +} + +const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { + return TM.getTargetLowering()->getObjFileLowering(); +} + + +/// getTargetData - Return information about data layout. +const TargetData &AsmPrinter::getTargetData() const { + return *TM.getTargetData(); +} + +/// getCurrentSection() - Return the current section we are emitting to. +const MCSection *AsmPrinter::getCurrentSection() const { + return OutStreamer.getCurrentSection(); +} + + + +void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineModuleInfo>(); + AU.addRequired<GCModuleInfo>(); + if (isVerbose()) + AU.addRequired<MachineLoopInfo>(); +} + +bool AsmPrinter::doInitialization(Module &M) { + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + MMI->AnalyzeModule(M); + + // Initialize TargetLoweringObjectFile. + const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) + .Initialize(OutContext, TM); + + Mang = new Mangler(OutContext, *TM.getTargetData()); + + // Allow the target to emit any magic that it wants at the start of the file. + EmitStartOfAsmFile(M); + + // Very minimal debug info. It is ignored if we emit actual debug info. If we + // don't, this at least helps the user find where a global came from. + if (MAI->hasSingleParameterDotFile()) { + // .file "foo.c" + OutStreamer.EmitFileDirective(M.getModuleIdentifier()); + } + + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) + MP->beginAssembly(*this); + + // Emit module-level inline asm if it exists. + if (!M.getModuleInlineAsm().empty()) { + OutStreamer.AddComment("Start of file scope inline assembly"); + OutStreamer.AddBlankLine(); + EmitInlineAsm(M.getModuleInlineAsm()+"\n", 0/*no loc cookie*/); + OutStreamer.AddComment("End of file scope inline assembly"); + OutStreamer.AddBlankLine(); + } + + if (MAI->doesSupportDebugInformation()) + DD = new DwarfDebug(this, &M); + + if (MAI->doesSupportExceptionHandling()) + DE = new DwarfException(this); + + return false; +} + +void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { + switch ((GlobalValue::LinkageTypes)Linkage) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::LinkerPrivateWeakLinkage: + case GlobalValue::LinkerPrivateWeakDefAutoLinkage: + if (MAI->getWeakDefDirective() != 0) { + // .globl _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + + if ((GlobalValue::LinkageTypes)Linkage != + GlobalValue::LinkerPrivateWeakDefAutoLinkage) + // .weak_definition _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); + else + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); + } else if (MAI->getLinkOnceDirective() != 0) { + // .globl _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + //NOTE: linkonce is handled by the section the symbol was assigned to. + } else { + // .weak _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); + } + break; + case GlobalValue::DLLExportLinkage: + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol. + // .globl _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + break; + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + case GlobalValue::LinkerPrivateLinkage: + break; + default: + llvm_unreachable("Unknown linkage type!"); + } +} + + +/// EmitGlobalVariable - Emit the specified global variable to the .s file. +void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + if (!GV->hasInitializer()) // External globals require no code. + return; + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GV)) + return; + + if (isVerbose()) { + WriteAsOperand(OutStreamer.GetCommentOS(), GV, + /*PrintType=*/false, GV->getParent()); + OutStreamer.GetCommentOS() << '\n'; + } + + MCSymbol *GVSym = Mang->getSymbol(GV); + EmitVisibility(GVSym, GV->getVisibility()); + + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); + + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + + const TargetData *TD = TM.getTargetData(); + uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + + // If the alignment is specified, we *must* obey it. Overaligning a global + // with a specified alignment is a prompt way to break globals emitted to + // sections and expected to be contiguous (e.g. ObjC metadata). + unsigned AlignLog = getGVAlignmentLog2(GV, *TD); + + // Handle common and BSS local symbols (.lcomm). + if (GVKind.isCommon() || GVKind.isBSSLocal()) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (isVerbose()) { + WriteAsOperand(OutStreamer.GetCommentOS(), GV, + /*PrintType=*/false, GV->getParent()); + OutStreamer.GetCommentOS() << '\n'; + } + + // Handle common symbols. + if (GVKind.isCommon()) { + // .comm _foo, 42, 4 + OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog); + return; + } + + // Handle local BSS symbols. + if (MAI->hasMachoZeroFillDirective()) { + const MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); + // .zerofill __DATA, __bss, _foo, 400, 5 + OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); + return; + } + + if (MAI->hasLCOMMDirective()) { + // .lcomm _foo, 42 + OutStreamer.EmitLocalCommonSymbol(GVSym, Size); + return; + } + + // .local _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local); + // .comm _foo, 42, 4 + OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog); + return; + } + + const MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); + + // Handle the zerofill directive on darwin, which is a special form of BSS + // emission. + if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) { + if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. + + // .globl _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + // .zerofill __DATA, __common, _foo, 400, 5 + OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); + return; + } + + // Handle thread local data for mach-o which requires us to output an + // additional structure of data and mangle the original symbol so that we + // can reference it later. + if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { + // Emit the .tbss symbol + MCSymbol *MangSym = + OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); + + if (GVKind.isThreadBSS()) + OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); + else if (GVKind.isThreadData()) { + OutStreamer.SwitchSection(TheSection); + + EmitAlignment(AlignLog, GV); + OutStreamer.EmitLabel(MangSym); + + EmitGlobalConstant(GV->getInitializer()); + } + + OutStreamer.AddBlankLine(); + + // Emit the variable struct for the runtime. + const MCSection *TLVSect + = getObjFileLowering().getTLSExtraDataSection(); + + OutStreamer.SwitchSection(TLVSect); + // Emit the linkage here. + EmitLinkage(GV->getLinkage(), GVSym); + OutStreamer.EmitLabel(GVSym); + + // Three pointers in size: + // - __tlv_bootstrap - used to make sure support exists + // - spare pointer, used when mapped by the runtime + // - pointer to mangled symbol above with initializer + unsigned PtrSize = TD->getPointerSizeInBits()/8; + OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), + PtrSize, 0); + OutStreamer.EmitIntValue(0, PtrSize, 0); + OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0); + + OutStreamer.AddBlankLine(); + return; + } + + OutStreamer.SwitchSection(TheSection); + + EmitLinkage(GV->getLinkage(), GVSym); + EmitAlignment(AlignLog, GV); + + OutStreamer.EmitLabel(GVSym); + + EmitGlobalConstant(GV->getInitializer()); + + if (MAI->hasDotTypeDotSizeDirective()) + // .size foo, 42 + OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext)); + + OutStreamer.AddBlankLine(); +} + +/// EmitFunctionHeader - This method emits the header for the current +/// function. +void AsmPrinter::EmitFunctionHeader() { + // Print out constants referenced by the function + EmitConstantPool(); + + // Print the 'header' of function. + const Function *F = MF->getFunction(); + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + EmitVisibility(CurrentFnSym, F->getVisibility()); + + EmitLinkage(F->getLinkage(), CurrentFnSym); + EmitAlignment(MF->getAlignment(), F); + + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); + + if (isVerbose()) { + WriteAsOperand(OutStreamer.GetCommentOS(), F, + /*PrintType=*/false, F->getParent()); + OutStreamer.GetCommentOS() << '\n'; + } + + // Emit the CurrentFnSym. This is a virtual function to allow targets to + // do their wild and crazy things as required. + EmitFunctionEntryLabel(); + + // If the function had address-taken blocks that got deleted, then we have + // references to the dangling symbols. Emit them at the start of the function + // so that we don't get references to undefined symbols. + std::vector<MCSymbol*> DeadBlockSyms; + MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms); + for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) { + OutStreamer.AddComment("Address taken block that was later removed"); + OutStreamer.EmitLabel(DeadBlockSyms[i]); + } + + // Add some workaround for linkonce linkage on Cygwin\MinGW. + if (MAI->getLinkOnceDirective() != 0 && + (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) { + // FIXME: What is this? + MCSymbol *FakeStub = + OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+ + CurrentFnSym->getName()); + OutStreamer.EmitLabel(FakeStub); + } + + // Emit pre-function debug and/or EH information. + if (DE) { + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->BeginFunction(MF); + } + if (DD) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->beginFunction(MF); + } +} + +/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the +/// function. This can be overridden by targets as required to do custom stuff. +void AsmPrinter::EmitFunctionEntryLabel() { + // The function label could have already been emitted if two symbols end up + // conflicting due to asm renaming. Detect this and emit an error. + if (CurrentFnSym->isUndefined()) + return OutStreamer.EmitLabel(CurrentFnSym); + + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' label emitted multiple times to assembly file"); +} + + +static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF, + raw_ostream &CommentOS) { + const LLVMContext &Ctx = MF->getFunction()->getContext(); + if (!DL.isUnknown()) { // Print source line info. + DIScope Scope(DL.getScope(Ctx)); + // Omit the directory, because it's likely to be long and uninteresting. + if (Scope.Verify()) + CommentOS << Scope.getFilename(); + else + CommentOS << "<unknown>"; + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << "[ "; + EmitDebugLoc(InlinedAtDL, MF, CommentOS); + CommentOS << " ]"; + } + } +} + +/// EmitComments - Pretty-print comments for instructions. +static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { + const MachineFunction *MF = MI.getParent()->getParent(); + const TargetMachine &TM = MF->getTarget(); + + DebugLoc DL = MI.getDebugLoc(); + if (!DL.isUnknown()) { // Print source line info. + EmitDebugLoc(DL, MF, CommentOS); + CommentOS << '\n'; + } + + // Check for spills and reloads + int FI; + + const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); + + // We assume a single instruction only has a spill or reload, not + // both. + const MachineMemOperand *MMO; + if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); + CommentOS << MMO->getSize() << "-byte Reload\n"; + } + } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) + CommentOS << MMO->getSize() << "-byte Folded Reload\n"; + } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); + CommentOS << MMO->getSize() << "-byte Spill\n"; + } + } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) + CommentOS << MMO->getSize() << "-byte Folded Spill\n"; + } + + // Check for spill-induced copies + if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) + CommentOS << " Reload Reuse\n"; +} + +/// EmitImplicitDef - This method emits the specified machine instruction +/// that is an implicit def. +static void EmitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { + unsigned RegNo = MI->getOperand(0).getReg(); + AP.OutStreamer.AddComment(Twine("implicit-def: ") + + AP.TM.getRegisterInfo()->getName(RegNo)); + AP.OutStreamer.AddBlankLine(); +} + +static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) { + std::string Str = "kill:"; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &Op = MI->getOperand(i); + assert(Op.isReg() && "KILL instruction must have only register operands"); + Str += ' '; + Str += AP.TM.getRegisterInfo()->getName(Op.getReg()); + Str += (Op.isDef() ? "<def>" : "<kill>"); + } + AP.OutStreamer.AddComment(Str); + AP.OutStreamer.AddBlankLine(); +} + +/// EmitDebugValueComment - This method handles the target-independent form +/// of DBG_VALUE, returning true if it was able to do so. A false return +/// means the target will need to handle MI in EmitInstruction. +static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { + // This code handles only the 3-operand target-independent form. + if (MI->getNumOperands() != 3) + return false; + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: "; + + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata())); + if (V.getContext().isSubprogram()) + OS << DISubprogram(V.getContext()).getDisplayName() << ":"; + OS << V.getName() << " <- "; + + // Register or immediate value. Register 0 means undef. + if (MI->getOperand(0).isFPImm()) { + APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); + if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) { + OS << (double)APF.convertToFloat(); + } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) { + OS << APF.convertToDouble(); + } else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + OS << "(long double) " << APF.convertToDouble(); + } + } else if (MI->getOperand(0).isImm()) { + OS << MI->getOperand(0).getImm(); + } else { + assert(MI->getOperand(0).isReg() && "Unknown operand type"); + if (MI->getOperand(0).getReg() == 0) { + // Suppress offset, it is not meaningful here. + OS << "undef"; + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(OS.str()); + return true; + } + OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); + } + + OS << '+' << MI->getOperand(1).getImm(); + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(OS.str()); + return true; +} + +/// EmitFunctionBody - This method emits the body and trailer for a +/// function. +void AsmPrinter::EmitFunctionBody() { + // Emit target-specific gunk before the function body. + EmitFunctionBodyStart(); + + bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo(); + + // Print out code for the function. + bool HasAnyRealCode = false; + const MachineInstr *LastMI = 0; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + // Print a label for the basic block. + EmitBasicBlockStart(I); + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + LastMI = II; + + // Print the assembly for the instruction. + if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() && + !II->isDebugValue()) { + HasAnyRealCode = true; + ++EmittedInsts; + } + + if (ShouldPrintDebugScopes) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->beginScope(II); + } + + if (isVerbose()) + EmitComments(*II, OutStreamer.GetCommentOS()); + + switch (II->getOpcode()) { + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol()); + break; + case TargetOpcode::INLINEASM: + EmitInlineAsm(II); + break; + case TargetOpcode::DBG_VALUE: + if (isVerbose()) { + if (!EmitDebugValueComment(II, *this)) + EmitInstruction(II); + } + break; + case TargetOpcode::IMPLICIT_DEF: + if (isVerbose()) EmitImplicitDef(II, *this); + break; + case TargetOpcode::KILL: + if (isVerbose()) EmitKill(II, *this); + break; + default: + EmitInstruction(II); + break; + } + + if (ShouldPrintDebugScopes) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endScope(II); + } + } + } + + // If the last instruction was a prolog label, then we have a situation where + // we emitted a prolog but no function body. This results in the ending prolog + // label equaling the end of function label and an invalid "row" in the + // FDE. We need to emit a noop in this situation so that the FDE's rows are + // valid. + bool RequiresNoop = LastMI && LastMI->isPrologLabel(); + + // If the function is empty and the object file uses .subsections_via_symbols, + // then we need to emit *something* to the function body to prevent the + // labels from collapsing together. Just emit a noop. + if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) { + MCInst Noop; + TM.getInstrInfo()->getNoopForMachoTarget(Noop); + if (Noop.getOpcode()) { + OutStreamer.AddComment("avoids zero-length function"); + OutStreamer.EmitInstruction(Noop); + } else // Target not mc-ized yet. + OutStreamer.EmitRawText(StringRef("\tnop\n")); + } + + // Emit target-specific gunk after the function body. + EmitFunctionBodyEnd(); + + // If the target wants a .size directive for the size of the function, emit + // it. + if (MAI->hasDotTypeDotSizeDirective()) { + // Create a symbol for the end of function, so we can get the size as + // difference between the function label and the temp label. + MCSymbol *FnEndLabel = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(FnEndLabel); + + const MCExpr *SizeExp = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext), + MCSymbolRefExpr::Create(CurrentFnSym, OutContext), + OutContext); + OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); + } + + // Emit post-function debug information. + if (DD) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endFunction(MF); + } + if (DE) { + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->EndFunction(); + } + MMI->EndFunction(); + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(); + + OutStreamer.AddBlankLine(); +} + +/// getDebugValueLocation - Get location information encoded by DBG_VALUE +/// operands. +MachineLocation AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + // Target specific DBG_VALUE instructions are handled by each target. + return MachineLocation(); +} + +bool AsmPrinter::doFinalization(Module &M) { + // Emit global variables. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + EmitGlobalVariable(I); + + // Finalize debug and EH information. + if (DE) { + { + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->EndModule(); + } + delete DE; DE = 0; + } + if (DD) { + { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endModule(); + } + delete DD; DD = 0; + } + + // If the target wants to know about weak references, print them all. + if (MAI->getWeakRefDirective()) { + // FIXME: This is not lazy, it would be nice to only print weak references + // to stuff that is actually used. Note that doing so would require targets + // to notice uses in operands (due to constant exprs etc). This should + // happen with the MC stuff eventually. + + // Print out module-level global variables here. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (!I->hasExternalWeakLinkage()) continue; + OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + } + + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (!I->hasExternalWeakLinkage()) continue; + OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + } + } + + if (MAI->hasSetDirective()) { + OutStreamer.AddBlankLine(); + for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E; ++I) { + MCSymbol *Name = Mang->getSymbol(I); + + const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal()); + MCSymbol *Target = Mang->getSymbol(GV); + + if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) + OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); + else if (I->hasWeakLinkage()) + OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); + else + assert(I->hasLocalLinkage() && "Invalid alias linkage"); + + EmitVisibility(Name, I->getVisibility()); + + // Emit the directives as assignments aka .set: + OutStreamer.EmitAssignment(Name, + MCSymbolRefExpr::Create(Target, OutContext)); + } + } + + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) + MP->finishAssembly(*this); + + // If we don't have any trampolines, then we don't require stack memory + // to be executable. Some targets have a directive to declare this. + Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); + if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) + if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext)) + OutStreamer.SwitchSection(S); + + // Allow the target to emit any magic that it wants at the end of the file, + // after everything else has gone out. + EmitEndOfAsmFile(M); + + delete Mang; Mang = 0; + MMI = 0; + + OutStreamer.Finish(); + return false; +} + +void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { + this->MF = &MF; + // Get the function symbol. + CurrentFnSym = Mang->getSymbol(MF.getFunction()); + + if (isVerbose()) + LI = &getAnalysis<MachineLoopInfo>(); +} + +namespace { + // SectionCPs - Keep track the alignment, constpool entries per Section. + struct SectionCPs { + const MCSection *S; + unsigned Alignment; + SmallVector<unsigned, 4> CPEs; + SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {} + }; +} + +/// EmitConstantPool - Print to the current output stream assembly +/// representations of the constants in the constant pool MCP. This is +/// used to print out constants which have been "spilled to memory" by +/// the code generator. +/// +void AsmPrinter::EmitConstantPool() { + const MachineConstantPool *MCP = MF->getConstantPool(); + const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); + if (CP.empty()) return; + + // Calculate sections for constant pool entries. We collect entries to go into + // the same section together to reduce amount of section switch statements. + SmallVector<SectionCPs, 4> CPSections; + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + const MachineConstantPoolEntry &CPE = CP[i]; + unsigned Align = CPE.getAlignment(); + + SectionKind Kind; + switch (CPE.getRelocationInfo()) { + default: llvm_unreachable("Unknown section kind"); + case 2: Kind = SectionKind::getReadOnlyWithRel(); break; + case 1: + Kind = SectionKind::getReadOnlyWithRelLocal(); + break; + case 0: + switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) { + case 4: Kind = SectionKind::getMergeableConst4(); break; + case 8: Kind = SectionKind::getMergeableConst8(); break; + case 16: Kind = SectionKind::getMergeableConst16();break; + default: Kind = SectionKind::getMergeableConst(); break; + } + } + + const MCSection *S = getObjFileLowering().getSectionForConstant(Kind); + + // The number of sections are small, just do a linear search from the + // last section to the first. + bool Found = false; + unsigned SecIdx = CPSections.size(); + while (SecIdx != 0) { + if (CPSections[--SecIdx].S == S) { + Found = true; + break; + } + } + if (!Found) { + SecIdx = CPSections.size(); + CPSections.push_back(SectionCPs(S, Align)); + } + + if (Align > CPSections[SecIdx].Alignment) + CPSections[SecIdx].Alignment = Align; + CPSections[SecIdx].CPEs.push_back(i); + } + + // Now print stuff into the calculated sections. + for (unsigned i = 0, e = CPSections.size(); i != e; ++i) { + OutStreamer.SwitchSection(CPSections[i].S); + EmitAlignment(Log2_32(CPSections[i].Alignment)); + + unsigned Offset = 0; + for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) { + unsigned CPI = CPSections[i].CPEs[j]; + MachineConstantPoolEntry CPE = CP[CPI]; + + // Emit inter-object padding for alignment. + unsigned AlignMask = CPE.getAlignment() - 1; + unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; + OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/); + + const Type *Ty = CPE.getType(); + Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty); + + // Emit the label with a comment on it. + if (isVerbose()) { + OutStreamer.GetCommentOS() << "constant pool "; + WriteTypeSymbolic(OutStreamer.GetCommentOS(), CPE.getType(), + MF->getFunction()->getParent()); + OutStreamer.GetCommentOS() << '\n'; + } + OutStreamer.EmitLabel(GetCPISymbol(CPI)); + + if (CPE.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); + else + EmitGlobalConstant(CPE.Val.ConstVal); + } + } +} + +/// EmitJumpTableInfo - Print assembly representations of the jump tables used +/// by the current function to the current output stream. +/// +void AsmPrinter::EmitJumpTableInfo() { + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + if (MJTI == 0) return; + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + // Pick the directive to use to print the jump table entries, and switch to + // the appropriate section. + const Function *F = MF->getFunction(); + bool JTInDiffSection = false; + if (// In PIC mode, we need to emit the jump table to the same section as the + // function body itself, otherwise the label differences won't make sense. + // FIXME: Need a better predicate for this: what about custom entries? + MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || + // We should also do if the section name is NULL or function is declared + // in discardable section + // FIXME: this isn't the right predicate, should be based on the MCSection + // for the function. + F->isWeakForLinker()) { + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM)); + } else { + // Otherwise, drop it in the readonly section. + const MCSection *ReadOnlySection = + getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly()); + OutStreamer.SwitchSection(ReadOnlySection); + JTInDiffSection = true; + } + + EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); + + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + + // If this jump table was deleted, ignore it. + if (JTBBs.empty()) continue; + + // For the EK_LabelDifference32 entry, if the target supports .set, emit a + // .set directive for each unique entry. This reduces the number of + // relocations the assembler will generate for the jump table. + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && + MAI->hasSetDirective()) { + SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; + const TargetLowering *TLI = TM.getTargetLowering(); + const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { + const MachineBasicBlock *MBB = JTBBs[ii]; + if (!EmittedSets.insert(MBB)) continue; + + // .set LJTSet, LBB32-base + const MCExpr *LHS = + MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), + MCBinaryExpr::CreateSub(LHS, Base, OutContext)); + } + } + + // On some targets (e.g. Darwin) we want to emit two consequtive labels + // before each jump table. The first label is never referenced, but tells + // the assembler and linker the extents of the jump table object. The + // second label is actually referenced by the code. + if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0]) + // FIXME: This doesn't have to have any specific name, just any randomly + // named and numbered 'l' label would work. Simplify GetJTISymbol. + OutStreamer.EmitLabel(GetJTISymbol(JTI, true)); + + OutStreamer.EmitLabel(GetJTISymbol(JTI)); + + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) + EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); + } +} + +/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the +/// current stream. +void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned UID) const { + const MCExpr *Value = 0; + switch (MJTI->getEntryKind()) { + case MachineJumpTableInfo::EK_Inline: + llvm_unreachable("Cannot emit EK_Inline jump table entry"); break; + case MachineJumpTableInfo::EK_Custom32: + Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID, + OutContext); + break; + case MachineJumpTableInfo::EK_BlockAddress: + // EK_BlockAddress - Each entry is a plain address of block, e.g.: + // .word LBB123 + Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + break; + case MachineJumpTableInfo::EK_GPRel32BlockAddress: { + // EK_GPRel32BlockAddress - Each entry is an address of block, encoded + // with a relocation as gp-relative, e.g.: + // .gprel32 LBB123 + MCSymbol *MBBSym = MBB->getSymbol(); + OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); + return; + } + + case MachineJumpTableInfo::EK_LabelDifference32: { + // EK_LabelDifference32 - Each entry is the address of the block minus + // the address of the jump table. This is used for PIC jump tables where + // gprel32 is not supported. e.g.: + // .word LBB123 - LJTI1_2 + // If the .set directive is supported, this is emitted as: + // .set L4_5_set_123, LBB123 - LJTI1_2 + // .word L4_5_set_123 + + // If we have emitted set directives for the jump table entries, print + // them rather than the entries themselves. If we're emitting PIC, then + // emit the table entries as differences between two text section labels. + if (MAI->hasSetDirective()) { + // If we used .set, reference the .set's symbol. + Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()), + OutContext); + break; + } + // Otherwise, use the difference as the jump table entry. + Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext); + Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext); + break; + } + } + + assert(Value && "Unknown entry kind!"); + + unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData()); + OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0); +} + + +/// EmitSpecialLLVMGlobal - Check to see if the specified global is a +/// special global used by LLVM. If so, emit it and return true, otherwise +/// do nothing and return false. +bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { + if (GV->getName() == "llvm.used") { + if (MAI->hasNoDeadStrip()) // No need to emit this at all. + EmitLLVMUsedList(GV->getInitializer()); + return true; + } + + // Ignore debug and non-emitted data. This handles llvm.compiler.used. + if (GV->getSection() == "llvm.metadata" || + GV->hasAvailableExternallyLinkage()) + return true; + + if (!GV->hasAppendingLinkage()) return false; + + assert(GV->hasInitializer() && "Not a special LLVM global!"); + + const TargetData *TD = TM.getTargetData(); + unsigned Align = Log2_32(TD->getPointerPrefAlignment()); + if (GV->getName() == "llvm.global_ctors") { + OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection()); + EmitAlignment(Align); + EmitXXStructorList(GV->getInitializer()); + + if (TM.getRelocationModel() == Reloc::Static && + MAI->hasStaticCtorDtorReferenceInStaticMode()) { + StringRef Sym(".constructors_used"); + OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym), + MCSA_Reference); + } + return true; + } + + if (GV->getName() == "llvm.global_dtors") { + OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection()); + EmitAlignment(Align); + EmitXXStructorList(GV->getInitializer()); + + if (TM.getRelocationModel() == Reloc::Static && + MAI->hasStaticCtorDtorReferenceInStaticMode()) { + StringRef Sym(".destructors_used"); + OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym), + MCSA_Reference); + } + return true; + } + + return false; +} + +/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each +/// global in the specified llvm.used list for which emitUsedDirectiveFor +/// is true, as being used with this directive. +void AsmPrinter::EmitLLVMUsedList(Constant *List) { + // Should be an array of 'i8*'. + ConstantArray *InitList = dyn_cast<ConstantArray>(List); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + const GlobalValue *GV = + dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); + if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) + OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip); + } +} + +/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the +/// function pointers, ignoring the init priority. +void AsmPrinter::EmitXXStructorList(Constant *List) { + // Should be an array of '{ int, void ()* }' structs. The first value is the + // init priority, which we ignore. + if (!isa<ConstantArray>(List)) return; + ConstantArray *InitList = cast<ConstantArray>(List); + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ + if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + + if (CS->getOperand(1)->isNullValue()) + return; // Found a null terminator, exit printing. + // Emit the function pointer. + EmitGlobalConstant(CS->getOperand(1)); + } +} + +//===--------------------------------------------------------------------===// +// Emission and print routines +// + +/// EmitInt8 - Emit a byte directive and value. +/// +void AsmPrinter::EmitInt8(int Value) const { + OutStreamer.EmitIntValue(Value, 1, 0/*addrspace*/); +} + +/// EmitInt16 - Emit a short directive and value. +/// +void AsmPrinter::EmitInt16(int Value) const { + OutStreamer.EmitIntValue(Value, 2, 0/*addrspace*/); +} + +/// EmitInt32 - Emit a long directive and value. +/// +void AsmPrinter::EmitInt32(int Value) const { + OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/); +} + +/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size +/// in bytes of the directive is specified by Size and Hi/Lo specify the +/// labels. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, + unsigned Size) const { + // Get the Hi-Lo expression. + const MCExpr *Diff = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext), + MCSymbolRefExpr::Create(Lo, OutContext), + OutContext); + + if (!MAI->hasSetDirective()) { + OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/); + return; + } + + // Otherwise, emit with .set (aka assignment). + MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); + OutStreamer.EmitAssignment(SetLabel, Diff); + OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/); +} + +/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" +/// where the size in bytes of the directive is specified by Size and Hi/Lo +/// specify the labels. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, + const MCSymbol *Lo, unsigned Size) + const { + + // Emit Hi+Offset - Lo + // Get the Hi+Offset expression. + const MCExpr *Plus = + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), + MCConstantExpr::Create(Offset, OutContext), + OutContext); + + // Get the Hi+Offset-Lo expression. + const MCExpr *Diff = + MCBinaryExpr::CreateSub(Plus, + MCSymbolRefExpr::Create(Lo, OutContext), + OutContext); + + if (!MAI->hasSetDirective()) + OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/); + else { + // Otherwise, emit with .set (aka assignment). + MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); + OutStreamer.EmitAssignment(SetLabel, Diff); + OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/); + } +} + +/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" +/// where the size in bytes of the directive is specified by Size and Label +/// specifies the label. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, + unsigned Size) + const { + + // Emit Label+Offset + const MCExpr *Plus = + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), + MCConstantExpr::Create(Offset, OutContext), + OutContext); + + OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/); +} + + +//===----------------------------------------------------------------------===// + +// EmitAlignment - Emit an alignment directive to the specified power of +// two boundary. For example, if you pass in 3 here, you will get an 8 +// byte alignment. If a global value is specified, and if that global has +// an explicit alignment requested, it will override the alignment request +// if required for correctness. +// +void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { + if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits); + + if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. + + if (getCurrentSection()->getKind().isText()) + OutStreamer.EmitCodeAlignment(1 << NumBits); + else + OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0); +} + +//===----------------------------------------------------------------------===// +// Constant emission. +//===----------------------------------------------------------------------===// + +/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. +/// +static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { + MCContext &Ctx = AP.OutContext; + + if (CV->isNullValue() || isa<UndefValue>(CV)) + return MCConstantExpr::Create(0, Ctx); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) + return MCConstantExpr::Create(CI->getZExtValue(), Ctx); + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) + return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); + + if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) + return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); + + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); + if (CE == 0) { + llvm_unreachable("Unknown constant value to lower!"); + return MCConstantExpr::Create(0, Ctx); + } + + switch (CE->getOpcode()) { + default: + // If the code isn't optimized, there may be outstanding folding + // opportunities. Attempt to fold the expression using TargetData as a + // last resort before giving up. + if (Constant *C = + ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) + if (C != CE) + return LowerConstant(C, AP); + + // Otherwise report the problem to the user. + { + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + WriteAsOperand(OS, CE, /*PrintType=*/false, + !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); + } + return MCConstantExpr::Create(0, Ctx); + case Instruction::GetElementPtr: { + const TargetData &TD = *AP.TM.getTargetData(); + // Generate a symbolic expression for the byte address + const Constant *PtrVal = CE->getOperand(0); + SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); + int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), &IdxVec[0], + IdxVec.size()); + + const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); + if (Offset == 0) + return Base; + + // Truncate/sext the offset to the pointer size. + if (TD.getPointerSizeInBits() != 64) { + int SExtAmount = 64-TD.getPointerSizeInBits(); + Offset = (Offset << SExtAmount) >> SExtAmount; + } + + return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), + Ctx); + } + + case Instruction::Trunc: + // We emit the value and depend on the assembler to truncate the generated + // expression properly. This is important for differences between + // blockaddress labels. Since the two labels are in the same function, it + // is reasonable to treat their delta as a 32-bit value. + // FALL THROUGH. + case Instruction::BitCast: + return LowerConstant(CE->getOperand(0), AP); + + case Instruction::IntToPtr: { + const TargetData &TD = *AP.TM.getTargetData(); + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), + false/*ZExt*/); + return LowerConstant(Op, AP); + } + + case Instruction::PtrToInt: { + const TargetData &TD = *AP.TM.getTargetData(); + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + const Type *Ty = CE->getType(); + + const MCExpr *OpExpr = LowerConstant(Op, AP); + + // We can emit the pointer value into this slot if the slot is an + // integer slot equal to the size of the pointer. + if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) + return OpExpr; + + // Otherwise the pointer is smaller than the resultant integer, mask off + // the high bits so we are sure to get a proper truncation if the input is + // a constant expr. + unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); + const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); + return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); + } + + // The MC library also has a right-shift operator, but it isn't consistently + // signed or unsigned between different targets. + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::Shl: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); + const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); + switch (CE->getOpcode()) { + default: llvm_unreachable("Unknown binary operator constant cast expr"); + case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); + case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); + case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); + case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); + case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); + case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); + case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); + } + } + } +} + +static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, + AsmPrinter &AP); + +static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, + AsmPrinter &AP) { + if (AddrSpace != 0 || !CA->isString()) { + // Not a string. Print the values in successive locations + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) + EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + return; + } + + // Otherwise, it can be emitted as .ascii. + SmallVector<char, 128> TmpVec; + TmpVec.reserve(CA->getNumOperands()); + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) + TmpVec.push_back(cast<ConstantInt>(CA->getOperand(i))->getZExtValue()); + + AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace); +} + +static void EmitGlobalConstantVector(const ConstantVector *CV, + unsigned AddrSpace, AsmPrinter &AP) { + for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) + EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); +} + +static void EmitGlobalConstantStruct(const ConstantStruct *CS, + unsigned AddrSpace, AsmPrinter &AP) { + // Print the fields in successive locations. Pad to align if needed! + const TargetData *TD = AP.TM.getTargetData(); + unsigned Size = TD->getTypeAllocSize(CS->getType()); + const StructLayout *Layout = TD->getStructLayout(CS->getType()); + uint64_t SizeSoFar = 0; + for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { + const Constant *Field = CS->getOperand(i); + + // Check if padding is needed and insert one or more 0s. + uint64_t FieldSize = TD->getTypeAllocSize(Field->getType()); + uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) + - Layout->getElementOffset(i)) - FieldSize; + SizeSoFar += FieldSize + PadSize; + + // Now print the actual field value. + EmitGlobalConstantImpl(Field, AddrSpace, AP); + + // Insert padding - this may include padding to increase the size of the + // current field up to the ABI size (if the struct is not packed) as well + // as padding to ensure that the next field starts at the right offset. + AP.OutStreamer.EmitZeros(PadSize, AddrSpace); + } + assert(SizeSoFar == Layout->getSizeInBytes() && + "Layout of constant struct may be incorrect!"); +} + +static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, + AsmPrinter &AP) { + // FP Constants are printed as integer constants to avoid losing + // precision. + if (CFP->getType()->isDoubleTy()) { + if (AP.isVerbose()) { + double Val = CFP->getValueAPF().convertToDouble(); + AP.OutStreamer.GetCommentOS() << "double " << Val << '\n'; + } + + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); + return; + } + + if (CFP->getType()->isFloatTy()) { + if (AP.isVerbose()) { + float Val = CFP->getValueAPF().convertToFloat(); + AP.OutStreamer.GetCommentOS() << "float " << Val << '\n'; + } + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace); + return; + } + + if (CFP->getType()->isX86_FP80Ty()) { + // all long double variants are printed as hex + // API needed to prevent premature destruction + APInt API = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t *p = API.getRawData(); + if (AP.isVerbose()) { + // Convert to double so we can print the approximate val as a comment. + APFloat DoubleVal = CFP->getValueAPF(); + bool ignored; + DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= " + << DoubleVal.convertToDouble() << '\n'; + } + + if (AP.TM.getTargetData()->isBigEndian()) { + AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); + AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); + } else { + AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); + AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); + } + + // Emit the tail padding for the long double. + const TargetData &TD = *AP.TM.getTargetData(); + AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - + TD.getTypeStoreSize(CFP->getType()), AddrSpace); + return; + } + + assert(CFP->getType()->isPPC_FP128Ty() && + "Floating point constant type not handled"); + // All long double variants are printed as hex + // API needed to prevent premature destruction. + APInt API = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t *p = API.getRawData(); + if (AP.TM.getTargetData()->isBigEndian()) { + AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); + AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace); + } else { + AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace); + AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); + } +} + +static void EmitGlobalConstantLargeInt(const ConstantInt *CI, + unsigned AddrSpace, AsmPrinter &AP) { + const TargetData *TD = AP.TM.getTargetData(); + unsigned BitWidth = CI->getBitWidth(); + assert((BitWidth & 63) == 0 && "only support multiples of 64-bits"); + + // We don't expect assemblers to support integer data directives + // for more than 64 bits, so we emit the data in at most 64-bit + // quantities at a time. + const uint64_t *RawData = CI->getValue().getRawData(); + for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { + uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i]; + AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); + } +} + +static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, + AsmPrinter &AP) { + if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) { + uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); + return AP.OutStreamer.EmitZeros(Size, AddrSpace); + } + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); + switch (Size) { + case 1: + case 2: + case 4: + case 8: + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue()); + AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); + return; + default: + EmitGlobalConstantLargeInt(CI, AddrSpace, AP); + return; + } + } + + if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) + return EmitGlobalConstantArray(CVA, AddrSpace, AP); + + if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) + return EmitGlobalConstantStruct(CVS, AddrSpace, AP); + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) + return EmitGlobalConstantFP(CFP, AddrSpace, AP); + + if (isa<ConstantPointerNull>(CV)) { + unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); + AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); + return; + } + + if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) + return EmitGlobalConstantVector(V, AddrSpace, AP); + + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it + // thread the streamer with EmitValue. + AP.OutStreamer.EmitValue(LowerConstant(CV, AP), + AP.TM.getTargetData()->getTypeAllocSize(CV->getType()), + AddrSpace); +} + +/// EmitGlobalConstant - Print a general LLVM constant to the .s file. +void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { + uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + if (Size) + EmitGlobalConstantImpl(CV, AddrSpace, *this); + else if (MAI->hasSubsectionsViaSymbols()) { + // If the global has zero size, emit a single byte so that two labels don't + // look like they are at the same location. + OutStreamer.EmitIntValue(0, 1, AddrSpace); + } +} + +void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + // Target doesn't support this yet! + llvm_unreachable("Target does not support EmitMachineConstantPoolValue"); +} + +void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { + if (Offset > 0) + OS << '+' << Offset; + else if (Offset < 0) + OS << Offset; +} + +//===----------------------------------------------------------------------===// +// Symbol Lowering Routines. +//===----------------------------------------------------------------------===// + +/// GetTempSymbol - Return the MCSymbol corresponding to the assembler +/// temporary label with the specified stem and unique ID. +MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const { + return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + + Name + Twine(ID)); +} + +/// GetTempSymbol - Return an assembler temporary label with the specified +/// stem. +MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const { + return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+ + Name); +} + + +MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const { + return MMI->getAddrLabelSymbol(BA->getBasicBlock()); +} + +MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { + return MMI->getAddrLabelSymbol(BB); +} + +/// GetCPISymbol - Return the symbol for the specified constant pool entry. +MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { + return OutContext.GetOrCreateSymbol + (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + + "_" + Twine(CPID)); +} + +/// GetJTISymbol - Return the symbol for the specified jump table entry. +MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { + return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate); +} + +/// GetJTSetSymbol - Return the symbol for the specified jump table .set +/// FIXME: privatize to AsmPrinter. +MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { + return OutContext.GetOrCreateSymbol + (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + + Twine(UID) + "_set_" + Twine(MBBID)); +} + +/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with +/// global value name as its base, with the specified suffix, and where the +/// symbol is forced to have private linkage if ForcePrivate is true. +MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV, + StringRef Suffix, + bool ForcePrivate) const { + SmallString<60> NameStr; + Mang->getNameWithPrefix(NameStr, GV, ForcePrivate); + NameStr.append(Suffix.begin(), Suffix.end()); + return OutContext.GetOrCreateSymbol(NameStr.str()); +} + +/// GetExternalSymbolSymbol - Return the MCSymbol for the specified +/// ExternalSymbol. +MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { + SmallString<60> NameStr; + Mang->getNameWithPrefix(NameStr, Sym); + return OutContext.GetOrCreateSymbol(NameStr.str()); +} + + + +/// PrintParentLoopComment - Print comments about parent loops of this one. +static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop, + unsigned FunctionNumber) { + if (Loop == 0) return; + PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber); + OS.indent(Loop->getLoopDepth()*2) + << "Parent Loop BB" << FunctionNumber << "_" + << Loop->getHeader()->getNumber() + << " Depth=" << Loop->getLoopDepth() << '\n'; +} + + +/// PrintChildLoopComment - Print comments about child loops within +/// the loop for this basic block, with nesting. +static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, + unsigned FunctionNumber) { + // Add child loop information + for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){ + OS.indent((*CL)->getLoopDepth()*2) + << "Child Loop BB" << FunctionNumber << "_" + << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth() + << '\n'; + PrintChildLoopComment(OS, *CL, FunctionNumber); + } +} + +/// EmitBasicBlockLoopComments - Pretty-print comments for basic blocks. +static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, + const MachineLoopInfo *LI, + const AsmPrinter &AP) { + // Add loop depth information + const MachineLoop *Loop = LI->getLoopFor(&MBB); + if (Loop == 0) return; + + MachineBasicBlock *Header = Loop->getHeader(); + assert(Header && "No header for loop"); + + // If this block is not a loop header, just print out what is the loop header + // and return. + if (Header != &MBB) { + AP.OutStreamer.AddComment(" in Loop: Header=BB" + + Twine(AP.getFunctionNumber())+"_" + + Twine(Loop->getHeader()->getNumber())+ + " Depth="+Twine(Loop->getLoopDepth())); + return; + } + + // Otherwise, it is a loop header. Print out information about child and + // parent loops. + raw_ostream &OS = AP.OutStreamer.GetCommentOS(); + + PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); + + OS << "=>"; + OS.indent(Loop->getLoopDepth()*2-2); + + OS << "This "; + if (Loop->empty()) + OS << "Inner "; + OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n'; + + PrintChildLoopComment(OS, Loop, AP.getFunctionNumber()); +} + + +/// EmitBasicBlockStart - This method prints the label for the specified +/// MachineBasicBlock, an alignment (if present) and a comment describing +/// it if appropriate. +void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { + // Emit an alignment directive for this block, if needed. + if (unsigned Align = MBB->getAlignment()) + EmitAlignment(Log2_32(Align)); + + // If the block has its address taken, emit any labels that were used to + // reference the block. It is possible that there is more than one label + // here, because multiple LLVM BB's may have been RAUW'd to this block after + // the references were generated. + if (MBB->hasAddressTaken()) { + const BasicBlock *BB = MBB->getBasicBlock(); + if (isVerbose()) + OutStreamer.AddComment("Block address taken"); + + std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB); + + for (unsigned i = 0, e = Syms.size(); i != e; ++i) + OutStreamer.EmitLabel(Syms[i]); + } + + // Print the main label for the block. + if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) { + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + if (const BasicBlock *BB = MBB->getBasicBlock()) + if (BB->hasName()) + OutStreamer.AddComment("%" + BB->getName()); + + EmitBasicBlockLoopComments(*MBB, LI, *this); + + // NOTE: Want this comment at start of line, don't emit with AddComment. + OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" + + Twine(MBB->getNumber()) + ":"); + } + } else { + if (isVerbose()) { + if (const BasicBlock *BB = MBB->getBasicBlock()) + if (BB->hasName()) + OutStreamer.AddComment("%" + BB->getName()); + EmitBasicBlockLoopComments(*MBB, LI, *this); + } + + OutStreamer.EmitLabel(MBB->getSymbol()); + } +} + +void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility) const { + MCSymbolAttr Attr = MCSA_Invalid; + + switch (Visibility) { + default: break; + case GlobalValue::HiddenVisibility: + Attr = MAI->getHiddenVisibilityAttr(); + break; + case GlobalValue::ProtectedVisibility: + Attr = MAI->getProtectedVisibilityAttr(); + break; + } + + if (Attr != MCSA_Invalid) + OutStreamer.EmitSymbolAttribute(Sym, Attr); +} + +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool AsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isLandingPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; + ++PI2; + if (PI2 != MBB->pred_end()) + return false; + + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *PI; + + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // If the block is completely empty, then it definitely does fall through. + if (Pred->empty()) + return true; + + // Otherwise, check the last instruction. + const MachineInstr &LastInst = Pred->back(); + return !LastInst.getDesc().isBarrier(); +} + + + +GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { + if (!S->usesMetadata()) + return 0; + + gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); + gcp_map_type::iterator GCPI = GCMap.find(S); + if (GCPI != GCMap.end()) + return GCPI->second; + + const char *Name = S->getName().c_str(); + + for (GCMetadataPrinterRegistry::iterator + I = GCMetadataPrinterRegistry::begin(), + E = GCMetadataPrinterRegistry::end(); I != E; ++I) + if (strcmp(Name, I->getName()) == 0) { + GCMetadataPrinter *GMP = I->instantiate(); + GMP->S = S; + GCMap.insert(std::make_pair(S, GMP)); + return GMP; + } + + report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); + return 0; +} + diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp new file mode 100644 index 0000000..ce4519c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -0,0 +1,279 @@ +//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Dwarf emissions parts of AsmPrinter. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Dwarf.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Dwarf Emission Helper Routines +//===----------------------------------------------------------------------===// + +/// EmitSLEB128 - emit the specified signed leb128 value. +void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { + if (isVerbose() && Desc) + OutStreamer.AddComment(Desc); + + if (MAI->hasLEB128() && OutStreamer.hasRawTextSupport()) { + // FIXME: MCize. + OutStreamer.EmitRawText("\t.sleb128\t" + Twine(Value)); + return; + } + + // If we don't have .sleb128, emit as .bytes. + int Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + + do { + unsigned char Byte = static_cast<unsigned char>(Value & 0x7f); + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + if (IsMore) Byte |= 0x80; + OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0); + } while (IsMore); +} + +/// EmitULEB128 - emit the specified signed leb128 value. +void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, + unsigned PadTo) const { + if (isVerbose() && Desc) + OutStreamer.AddComment(Desc); + + if (MAI->hasLEB128() && PadTo == 0 && OutStreamer.hasRawTextSupport()) { + // FIXME: MCize. + OutStreamer.EmitRawText("\t.uleb128\t" + Twine(Value)); + return; + } + + // If we don't have .uleb128 or we want to emit padding, emit as .bytes. + do { + unsigned char Byte = static_cast<unsigned char>(Value & 0x7f); + Value >>= 7; + if (Value || PadTo != 0) Byte |= 0x80; + OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0); + } while (Value); + + if (PadTo) { + if (PadTo > 1) + OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/); + OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/); + } +} + +/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. +void AsmPrinter::EmitCFAByte(unsigned Val) const { + if (isVerbose()) { + if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64) + OutStreamer.AddComment("DW_CFA_offset + Reg (" + + Twine(Val-dwarf::DW_CFA_offset) + ")"); + else + OutStreamer.AddComment(dwarf::CallFrameString(Val)); + } + OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/); +} + +static const char *DecodeDWARFEncoding(unsigned Encoding) { + switch (Encoding) { + case dwarf::DW_EH_PE_absptr: return "absptr"; + case dwarf::DW_EH_PE_omit: return "omit"; + case dwarf::DW_EH_PE_pcrel: return "pcrel"; + case dwarf::DW_EH_PE_udata4: return "udata4"; + case dwarf::DW_EH_PE_udata8: return "udata8"; + case dwarf::DW_EH_PE_sdata4: return "sdata4"; + case dwarf::DW_EH_PE_sdata8: return "sdata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: + return "indirect pcrel udata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: + return "indirect pcrel sdata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: + return "indirect pcrel udata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: + return "indirect pcrel sdata8"; + } + + return "<unknown encoding>"; +} + + +/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an +/// encoding. If verbose assembly output is enabled, we output comments +/// describing the encoding. Desc is an optional string saying what the +/// encoding is specifying (e.g. "LSDA"). +void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { + if (isVerbose()) { + if (Desc != 0) + OutStreamer.AddComment(Twine(Desc)+" Encoding = " + + Twine(DecodeDWARFEncoding(Val))); + else + OutStreamer.AddComment(Twine("Encoding = ") + + DecodeDWARFEncoding(Val)); + } + + OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/); +} + +/// GetSizeOfEncodedValue - Return the size of the encoding in bytes. +unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { + if (Encoding == dwarf::DW_EH_PE_omit) + return 0; + + switch (Encoding & 0x07) { + default: assert(0 && "Invalid encoded value."); + case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize(); + case dwarf::DW_EH_PE_udata2: return 2; + case dwarf::DW_EH_PE_udata4: return 4; + case dwarf::DW_EH_PE_udata8: return 8; + } +} + +void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const { + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + + const MCExpr *Exp = + TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer); + OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0); +} + +void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{ + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + + const MCExpr *Exp = + TLOF.getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, OutStreamer); + OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0); +} + +/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its +/// section. This can be done with a special directive if the target supports +/// it (e.g. cygwin) or by emitting it as an offset from a label at the start +/// of the section. +/// +/// SectionLabel is a temporary label emitted at the start of the section that +/// Label lives in. +void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, + const MCSymbol *SectionLabel) const { + // On COFF targets, we have to emit the special .secrel32 directive. + if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) { + // FIXME: MCize. + OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName())); + return; + } + + // Get the section that we're referring to, based on SectionLabel. + const MCSection &Section = SectionLabel->getSection(); + + // If Label has already been emitted, verify that it is in the same section as + // section label for sanity. + assert((!Label->isInSection() || &Label->getSection() == &Section) && + "Section offset using wrong section base for label"); + + // If the section in question will end up with an address of 0 anyway, we can + // just emit an absolute reference to save a relocation. + if (Section.isBaseAddressKnownZero()) { + OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/); + return; + } + + // Otherwise, emit it as a label difference from the start of the section. + EmitLabelDifference(Label, SectionLabel, 4); +} + +//===----------------------------------------------------------------------===// +// Dwarf Lowering Routines +//===----------------------------------------------------------------------===// + + +/// EmitFrameMoves - Emit frame instructions to describe the layout of the +/// frame. +void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves, + MCSymbol *BaseLabel, bool isEH) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + + int stackGrowth = TM.getTargetData()->getPointerSize(); + if (TM.getFrameInfo()->getStackGrowthDirection() != + TargetFrameInfo::StackGrowsUp) + stackGrowth *= -1; + + for (unsigned i = 0, N = Moves.size(); i < N; ++i) { + const MachineMove &Move = Moves[i]; + MCSymbol *Label = Move.getLabel(); + // Throw out move if the label is invalid. + if (Label && !Label->isDefined()) continue; // Not emitted, in dead code. + + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); + + // Advance row if new location. + if (BaseLabel && Label) { + MCSymbol *ThisSym = Label; + if (ThisSym != BaseLabel) { + EmitCFAByte(dwarf::DW_CFA_advance_loc4); + EmitLabelDifference(ThisSym, BaseLabel, 4); + BaseLabel = ThisSym; + } + } + + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + assert(!Src.isReg() && "Machine move not supported yet."); + + if (Src.getReg() == MachineLocation::VirtualFP) { + EmitCFAByte(dwarf::DW_CFA_def_cfa_offset); + } else { + EmitCFAByte(dwarf::DW_CFA_def_cfa); + EmitULEB128(RI->getDwarfRegNum(Src.getReg(), isEH), "Register"); + } + + EmitULEB128(-Src.getOffset(), "Offset"); + continue; + } + + if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + assert(Dst.isReg() && "Machine move not supported yet."); + EmitCFAByte(dwarf::DW_CFA_def_cfa_register); + EmitULEB128(RI->getDwarfRegNum(Dst.getReg(), isEH), "Register"); + continue; + } + + unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH); + int Offset = Dst.getOffset() / stackGrowth; + + if (Offset < 0) { + EmitCFAByte(dwarf::DW_CFA_offset_extended_sf); + EmitULEB128(Reg, "Reg"); + EmitSLEB128(Offset, "Offset"); + } else if (Reg < 64) { + EmitCFAByte(dwarf::DW_CFA_offset + Reg); + EmitULEB128(Offset, "Offset"); + } else { + EmitCFAByte(dwarf::DW_CFA_offset_extended); + EmitULEB128(Reg, "Reg"); + EmitULEB128(Offset, "Offset"); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp new file mode 100644 index 0000000..df03168 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -0,0 +1,371 @@ +//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the inline assembler pieces of the AsmPrinter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Constants.h" +#include "llvm/InlineAsm.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetAsmParser.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// EmitInlineAsm - Emit a blob of inline asm to the output streamer. +void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { + assert(!Str.empty() && "Can't emit empty inline asm block"); + + // Remember if the buffer is nul terminated or not so we can avoid a copy. + bool isNullTerminated = Str.back() == 0; + if (isNullTerminated) + Str = Str.substr(0, Str.size()-1); + + // If the output streamer is actually a .s file, just emit the blob textually. + // This is useful in case the asm parser doesn't handle something but the + // system assembler does. + if (OutStreamer.hasRawTextSupport()) { + OutStreamer.EmitRawText(Str); + return; + } + + SourceMgr SrcMgr; + + // If the current LLVMContext has an inline asm handler, set it in SourceMgr. + LLVMContext &LLVMCtx = MMI->getModule()->getContext(); + bool HasDiagHandler = false; + if (void *DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler()) { + SrcMgr.setDiagHandler((SourceMgr::DiagHandlerTy)(intptr_t)DiagHandler, + LLVMCtx.getInlineAsmDiagnosticContext(), LocCookie); + HasDiagHandler = true; + } + + MemoryBuffer *Buffer; + if (isNullTerminated) + Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>"); + else + Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>"); + + // Tell SrcMgr about this buffer, it takes ownership of the buffer. + SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + + OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr, + OutContext, OutStreamer, + *MAI)); + OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*Parser, TM)); + if (!TAP) + report_fatal_error("Inline asm not supported by this streamer because" + " we don't have an asm parser for this target\n"); + Parser->setTargetParser(*TAP.get()); + + // Don't implicitly switch to the text section before the asm. + int Res = Parser->Run(/*NoInitialTextSection*/ true, + /*NoFinalize*/ true); + if (Res && !HasDiagHandler) + report_fatal_error("Error parsing inline asm\n"); +} + + +/// EmitInlineAsm - This method formats and emits the specified machine +/// instruction that is an inline asm. +void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { + assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); + + unsigned NumOperands = MI->getNumOperands(); + + // Count the number of register definitions to find the asm string. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != NumOperands-2 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + + // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + + // If this asmstr is empty, just print the #APP/#NOAPP markers. + // These are useful to see where empty asm's wound up. + if (AsmStr[0] == 0) { + // Don't emit the comments if writing to a .o file. + if (!OutStreamer.hasRawTextSupport()) return; + + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmStart()); + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmEnd()); + return; + } + + // Emit the #APP start marker. This has to happen even if verbose-asm isn't + // enabled, so we use EmitRawText. + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmStart()); + + // Get the !srcloc metadata node if we have it, and decode the loc cookie from + // it. + unsigned LocCookie = 0; + for (unsigned i = MI->getNumOperands(); i != 0; --i) { + if (MI->getOperand(i-1).isMetadata()) + if (const MDNode *SrcLoc = MI->getOperand(i-1).getMetadata()) + if (SrcLoc->getNumOperands() != 0) + if (const ConstantInt *CI = + dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + + // Emit the inline asm to a temporary string so we can emit it through + // EmitInlineAsm. + SmallString<256> StringData; + raw_svector_ostream OS(StringData); + + OS << '\t'; + + // The variant of the current asmprinter. + int AsmPrinterVariant = MAI->getAssemblerDialect(); + + int CurVariant = -1; // The number of the {.|.|.} region we are in. + const char *LastEmitted = AsmStr; // One past the last character emitted. + + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + OS.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + OS << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; + + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': // $$ -> $ + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + OS << '$'; + ++LastEmitted; // Consume second '$' character. + break; + case '(': // $( -> same as GCC's { character. + ++LastEmitted; // Consume '(' character. + if (CurVariant != -1) + report_fatal_error("Nested variants found in inline asm string: '" + + Twine(AsmStr) + "'"); + CurVariant = 0; // We're in the first variant now. + break; + case '|': + ++LastEmitted; // consume '|' character. + if (CurVariant == -1) + OS << '|'; // this is gcc's behavior for | outside a variant + else + ++CurVariant; // We're in the next variant. + break; + case ')': // $) -> same as GCC's } char. + ++LastEmitted; // consume ')' character. + if (CurVariant == -1) + OS << '}'; // this is gcc's behavior for } outside a variant + else + CurVariant = -1; + break; + } + if (Done) break; + + bool HasCurlyBraces = false; + if (*LastEmitted == '{') { // ${variable} + ++LastEmitted; // Consume '{' character. + HasCurlyBraces = true; + } + + // If we have ${:foo}, then this is not a real operand reference, it is a + // "magic" string reference, just like in .td files. Arrange to call + // PrintSpecial. + if (HasCurlyBraces && *LastEmitted == ':') { + ++LastEmitted; + const char *StrStart = LastEmitted; + const char *StrEnd = strchr(StrStart, '}'); + if (StrEnd == 0) + report_fatal_error("Unterminated ${:foo} operand in inline asm" + " string: '" + Twine(AsmStr) + "'"); + + std::string Val(StrStart, StrEnd); + PrintSpecial(MI, OS, Val.c_str()); + LastEmitted = StrEnd+1; + break; + } + + const char *IDStart = LastEmitted; + const char *IDEnd = IDStart; + while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; + + unsigned Val; + if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + LastEmitted = IDEnd; + + char Modifier[2] = { 0, 0 }; + + if (HasCurlyBraces) { + // If we have curly braces, check for a modifier character. This + // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. + if (*LastEmitted == ':') { + ++LastEmitted; // Consume ':' character. + if (*LastEmitted == 0) + report_fatal_error("Bad ${:} expression in inline asm string: '" + + Twine(AsmStr) + "'"); + + Modifier[0] = *LastEmitted; + ++LastEmitted; // Consume modifier character. + } + + if (*LastEmitted != '}') + report_fatal_error("Bad ${} expression in inline asm string: '" + + Twine(AsmStr) + "'"); + ++LastEmitted; // Consume '}' character. + } + + if (Val >= NumOperands-1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + + // Okay, we finally have a value number. Ask the target to print this + // operand! + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { + unsigned OpNo = 2; + + bool Error = false; + + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + + if (OpNo >= MI->getNumOperands()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + if (Modifier[0] == 'l') // labels are target independent + // FIXME: What if the operand isn't an MBB, report error? + OS << *MI->getOperand(OpNo).getMBB()->getSymbol(); + else { + AsmPrinter *AP = const_cast<AsmPrinter*>(this); + if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant, + Modifier[0] ? Modifier : 0, + OS); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant, + Modifier[0] ? Modifier : 0, OS); + } + } + } + if (Error) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); + } + } + break; + } + } + } + OS << '\n' << (char)0; // null terminate string. + EmitInlineAsm(OS.str(), LocCookie); + + // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't + // enabled, so we use EmitRawText. + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmEnd()); +} + + +/// PrintSpecial - Print information related to the specified machine instr +/// that is independent of the operand, and may be independent of the instr +/// itself. This can be useful for portably encoding the comment character +/// or other bits of target-specific knowledge into the asmstrings. The +/// syntax used is ${:comment}. Targets can override this to add support +/// for their own strange codes. +void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, + const char *Code) const { + if (!strcmp(Code, "private")) { + OS << MAI->getPrivateGlobalPrefix(); + } else if (!strcmp(Code, "comment")) { + OS << MAI->getCommentString(); + } else if (!strcmp(Code, "uid")) { + // Comparing the address of MI isn't sufficient, because machineinstrs may + // be allocated to the same address across functions. + + // If this is a new LastFn instruction, bump the counter. + if (LastMI != MI || LastFn != getFunctionNumber()) { + ++Counter; + LastMI = MI; + LastFn = getFunctionNumber(); + } + OS << Counter; + } else { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Unknown special formatter '" << Code + << "' for machine instr: " << *MI; + report_fatal_error(Msg.str()); + } +} + +/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM +/// instruction, using the specified assembler variant. Targets should +/// override this to format as appropriate. +bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) { + // Target doesn't support this yet! + return true; +} + +bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + // Target doesn't support this yet! + return true; +} + diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt new file mode 100644 index 0000000..ca8b843 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -0,0 +1,11 @@ +add_llvm_library(LLVMAsmPrinter + AsmPrinter.cpp + AsmPrinterDwarf.cpp + AsmPrinterInlineAsm.cpp + DIE.cpp + DwarfDebug.cpp + DwarfException.cpp + OcamlGCPrinter.cpp + ) + +target_link_libraries (LLVMAsmPrinter LLVMMCParser) diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp new file mode 100644 index 0000000..21396ca --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -0,0 +1,368 @@ +//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Data structures for DWARF info entries. +// +//===----------------------------------------------------------------------===// + +#include "DIE.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// DIEAbbrevData Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Attribute); + ID.AddInteger(Form); +} + +//===----------------------------------------------------------------------===// +// DIEAbbrev Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Tag); + ID.AddInteger(ChildrenFlag); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) + Data[i].Profile(ID); +} + +/// Emit - Print the abbreviation using the specified asm printer. +/// +void DIEAbbrev::Emit(AsmPrinter *AP) const { + // Emit its Dwarf tag type. + // FIXME: Doing work even in non-asm-verbose runs. + AP->EmitULEB128(Tag, dwarf::TagString(Tag)); + + // Emit whether it has children DIEs. + // FIXME: Doing work even in non-asm-verbose runs. + AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag)); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + const DIEAbbrevData &AttrData = Data[i]; + + // Emit attribute type. + // FIXME: Doing work even in non-asm-verbose runs. + AP->EmitULEB128(AttrData.getAttribute(), + dwarf::AttributeString(AttrData.getAttribute())); + + // Emit form type. + // FIXME: Doing work even in non-asm-verbose runs. + AP->EmitULEB128(AttrData.getForm(), + dwarf::FormEncodingString(AttrData.getForm())); + } + + // Mark end of abbreviation. + AP->EmitULEB128(0, "EOM(1)"); + AP->EmitULEB128(0, "EOM(2)"); +} + +#ifndef NDEBUG +void DIEAbbrev::print(raw_ostream &O) { + O << "Abbreviation @" + << format("0x%lx", (long)(intptr_t)this) + << " " + << dwarf::TagString(Tag) + << " " + << dwarf::ChildrenString(ChildrenFlag) + << '\n'; + + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << " " + << dwarf::AttributeString(Data[i].getAttribute()) + << " " + << dwarf::FormEncodingString(Data[i].getForm()) + << '\n'; + } +} +void DIEAbbrev::dump() { print(dbgs()); } +#endif + +//===----------------------------------------------------------------------===// +// DIE Implementation +//===----------------------------------------------------------------------===// + +DIE::~DIE() { + for (unsigned i = 0, N = Children.size(); i < N; ++i) + delete Children[i]; +} + +/// addSiblingOffset - Add a sibling offset field to the front of the DIE. +/// +DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) { + DIEInteger *DI = new (A) DIEInteger(0); + Values.insert(Values.begin(), DI); + Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4); + return DI; +} + +#ifndef NDEBUG +void DIE::print(raw_ostream &O, unsigned IncIndent) { + IndentCount += IncIndent; + const std::string Indent(IndentCount, ' '); + bool isBlock = Abbrev.getTag() == 0; + + if (!isBlock) { + O << Indent + << "Die: " + << format("0x%lx", (long)(intptr_t)this) + << ", Offset: " << Offset + << ", Size: " << Size << "\n"; + + O << Indent + << dwarf::TagString(Abbrev.getTag()) + << " " + << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n"; + } else { + O << "Size: " << Size << "\n"; + } + + const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData(); + + IndentCount += 2; + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << Indent; + + if (!isBlock) + O << dwarf::AttributeString(Data[i].getAttribute()); + else + O << "Blk[" << i << "]"; + + O << " " + << dwarf::FormEncodingString(Data[i].getForm()) + << " "; + Values[i]->print(O); + O << "\n"; + } + IndentCount -= 2; + + for (unsigned j = 0, M = Children.size(); j < M; ++j) { + Children[j]->print(O, 4); + } + + if (!isBlock) O << "\n"; + IndentCount -= IncIndent; +} + +void DIE::dump() { + print(dbgs()); +} +#endif + + +#ifndef NDEBUG +void DIEValue::dump() { + print(dbgs()); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEInteger Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit integer of appropriate size. +/// +void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { + unsigned Size = ~0U; + switch (Form) { + case dwarf::DW_FORM_flag: // Fall thru + case dwarf::DW_FORM_ref1: // Fall thru + case dwarf::DW_FORM_data1: Size = 1; break; + case dwarf::DW_FORM_ref2: // Fall thru + case dwarf::DW_FORM_data2: Size = 2; break; + case dwarf::DW_FORM_ref4: // Fall thru + case dwarf::DW_FORM_data4: Size = 4; break; + case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_data8: Size = 8; break; + case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; + case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; + case dwarf::DW_FORM_addr: Size = Asm->getTargetData().getPointerSize(); break; + default: llvm_unreachable("DIE Value form not supported yet"); + } + Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/); +} + +/// SizeOf - Determine size of integer value in bytes. +/// +unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { + switch (Form) { + case dwarf::DW_FORM_flag: // Fall thru + case dwarf::DW_FORM_ref1: // Fall thru + case dwarf::DW_FORM_data1: return sizeof(int8_t); + case dwarf::DW_FORM_ref2: // Fall thru + case dwarf::DW_FORM_data2: return sizeof(int16_t); + case dwarf::DW_FORM_ref4: // Fall thru + case dwarf::DW_FORM_data4: return sizeof(int32_t); + case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_data8: return sizeof(int64_t); + case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); + case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); + case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize(); + default: llvm_unreachable("DIE Value form not supported yet"); break; + } + return 0; +} + +#ifndef NDEBUG +void DIEInteger::print(raw_ostream &O) { + O << "Int: " << (int64_t)Integer + << format(" 0x%llx", (unsigned long long)Integer); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEString Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit string value. +/// +void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const { + AP->OutStreamer.EmitBytes(Str, /*addrspace*/0); + // Emit nul terminator. + AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0); +} + +#ifndef NDEBUG +void DIEString::print(raw_ostream &O) { + O << "Str: \"" << Str << "\""; +} +#endif + +//===----------------------------------------------------------------------===// +// DIELabel Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit label value. +/// +void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { + AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form), 0/*AddrSpace*/); +} + +/// SizeOf - Determine size of label value in bytes. +/// +unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + return AP->getTargetData().getPointerSize(); +} + +#ifndef NDEBUG +void DIELabel::print(raw_ostream &O) { + O << "Lbl: " << Label->getName(); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEDelta Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit delta value. +/// +void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { + AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + return AP->getTargetData().getPointerSize(); +} + +#ifndef NDEBUG +void DIEDelta::print(raw_ostream &O) { + O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName(); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEEntry Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit debug information entry offset. +/// +void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const { + AP->EmitInt32(Entry->getOffset()); +} + +#ifndef NDEBUG +void DIEEntry::print(raw_ostream &O) { + O << format("Die: 0x%lx", (long)(intptr_t)Entry); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEBlock Implementation +//===----------------------------------------------------------------------===// + +/// ComputeSize - calculate the size of the block. +/// +unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { + if (!Size) { + const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) + Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm()); + } + + return Size; +} + +/// EmitValue - Emit block data. +/// +void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { + switch (Form) { + default: assert(0 && "Improper form for block"); break; + case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; + case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; + } + + const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) + Values[i]->EmitValue(Asm, AbbrevData[i].getForm()); +} + +/// SizeOf - Determine size of block data in bytes. +/// +unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const { + switch (Form) { + case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); + case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); + case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); + case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size); + default: llvm_unreachable("Improper form for block"); break; + } + return 0; +} + +#ifndef NDEBUG +void DIEBlock::print(raw_ostream &O) { + O << "Blk: "; + DIE::print(O, 5); +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h new file mode 100644 index 0000000..d56c094 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h @@ -0,0 +1,434 @@ +//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Data structures for DWARF info entries. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DIE_H__ +#define CODEGEN_ASMPRINTER_DIE_H__ + +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Dwarf.h" +#include <vector> + +namespace llvm { + class AsmPrinter; + class MCSymbol; + class raw_ostream; + + //===--------------------------------------------------------------------===// + /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a + /// Dwarf abbreviation. + class DIEAbbrevData { + /// Attribute - Dwarf attribute code. + /// + unsigned Attribute; + + /// Form - Dwarf form code. + /// + unsigned Form; + public: + DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {} + + // Accessors. + unsigned getAttribute() const { return Attribute; } + unsigned getForm() const { return Form; } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + }; + + //===--------------------------------------------------------------------===// + /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug + /// information object. + class DIEAbbrev : public FoldingSetNode { + /// Tag - Dwarf tag code. + /// + unsigned Tag; + + /// Unique number for node. + /// + unsigned Number; + + /// ChildrenFlag - Dwarf children flag. + /// + unsigned ChildrenFlag; + + /// Data - Raw data bytes for abbreviation. + /// + SmallVector<DIEAbbrevData, 8> Data; + + public: + DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {} + virtual ~DIEAbbrev() {} + + // Accessors. + unsigned getTag() const { return Tag; } + unsigned getNumber() const { return Number; } + unsigned getChildrenFlag() const { return ChildrenFlag; } + const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; } + void setTag(unsigned T) { Tag = T; } + void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; } + void setNumber(unsigned N) { Number = N; } + + /// AddAttribute - Adds another set of attribute information to the + /// abbreviation. + void AddAttribute(unsigned Attribute, unsigned Form) { + Data.push_back(DIEAbbrevData(Attribute, Form)); + } + + /// AddFirstAttribute - Adds a set of attribute information to the front + /// of the abbreviation. + void AddFirstAttribute(unsigned Attribute, unsigned Form) { + Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form)); + } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + + /// Emit - Print the abbreviation using the specified asm printer. + /// + void Emit(AsmPrinter *AP) const; + +#ifndef NDEBUG + void print(raw_ostream &O); + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIE - A structured debug information entry. Has an abbreviation which + /// describes it's organization. + class DIEValue; + + class DIE { + protected: + /// Abbrev - Buffer for constructing abbreviation. + /// + DIEAbbrev Abbrev; + + /// Offset - Offset in debug info section. + /// + unsigned Offset; + + /// Size - Size of instance + children. + /// + unsigned Size; + + /// Children DIEs. + /// + std::vector<DIE *> Children; + + DIE *Parent; + + /// Attributes values. + /// + SmallVector<DIEValue*, 32> Values; + + // Private data for print() + mutable unsigned IndentCount; + public: + explicit DIE(unsigned Tag) + : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0), + Size(0), Parent (0), IndentCount(0) {} + virtual ~DIE(); + + // Accessors. + DIEAbbrev &getAbbrev() { return Abbrev; } + unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } + unsigned getTag() const { return Abbrev.getTag(); } + unsigned getOffset() const { return Offset; } + unsigned getSize() const { return Size; } + const std::vector<DIE *> &getChildren() const { return Children; } + const SmallVector<DIEValue*, 32> &getValues() const { return Values; } + DIE *getParent() const { return Parent; } + void setTag(unsigned Tag) { Abbrev.setTag(Tag); } + void setOffset(unsigned O) { Offset = O; } + void setSize(unsigned S) { Size = S; } + + /// addValue - Add a value and attributes to a DIE. + /// + void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) { + Abbrev.AddAttribute(Attribute, Form); + Values.push_back(Value); + } + + /// SiblingOffset - Return the offset of the debug information entry's + /// sibling. + unsigned getSiblingOffset() const { return Offset + Size; } + + /// addSiblingOffset - Add a sibling offset field to the front of the DIE. + /// The caller is responsible for deleting the return value at or after the + /// same time it destroys this DIE. + /// + DIEValue *addSiblingOffset(BumpPtrAllocator &A); + + /// addChild - Add a child to the DIE. + /// + void addChild(DIE *Child) { + if (Child->getParent()) { + assert (Child->getParent() == this && "Unexpected DIE Parent!"); + return; + } + Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); + Children.push_back(Child); + Child->Parent = this; + } + +#ifndef NDEBUG + void print(raw_ostream &O, unsigned IncIndent = 0); + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEValue - A debug information entry value. + /// + class DIEValue { + public: + enum { + isInteger, + isString, + isLabel, + isSectionOffset, + isDelta, + isEntry, + isBlock + }; + protected: + /// Type - Type of data stored in the value. + /// + unsigned Type; + public: + explicit DIEValue(unsigned T) : Type(T) {} + virtual ~DIEValue() {} + + // Accessors + unsigned getType() const { return Type; } + + /// EmitValue - Emit value via the Dwarf writer. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0; + + /// SizeOf - Return the size of a value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *) { return true; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O) = 0; + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEInteger - An integer value DIE. + /// + class DIEInteger : public DIEValue { + uint64_t Integer; + public: + explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} + + /// BestForm - Choose the best form for integer. + /// + static unsigned BestForm(bool IsSigned, uint64_t Int) { + if (IsSigned) { + if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1; + if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2; + if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4; + } else { + if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1; + if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2; + if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4; + } + return dwarf::DW_FORM_data8; + } + + /// EmitValue - Emit integer of appropriate size. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + uint64_t getValue() const { return Integer; } + + /// SizeOf - Determine size of integer value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEInteger *) { return true; } + static bool classof(const DIEValue *I) { return I->getType() == isInteger; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEString - A string value DIE. This DIE keeps string reference only. + /// + class DIEString : public DIEValue { + const StringRef Str; + public: + explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {} + + /// EmitValue - Emit string value. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + /// SizeOf - Determine size of string value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const { + return Str.size() + sizeof(char); // sizeof('\0'); + } + + // Implement isa/cast/dyncast. + static bool classof(const DIEString *) { return true; } + static bool classof(const DIEValue *S) { return S->getType() == isString; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIELabel - A label expression DIE. + // + class DIELabel : public DIEValue { + const MCSymbol *Label; + public: + explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {} + + /// EmitValue - Emit label value. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + /// getValue - Get MCSymbol. + /// + const MCSymbol *getValue() const { return Label; } + + /// SizeOf - Determine size of label value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIELabel *) { return true; } + static bool classof(const DIEValue *L) { return L->getType() == isLabel; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEDelta - A simple label difference DIE. + /// + class DIEDelta : public DIEValue { + const MCSymbol *LabelHi; + const MCSymbol *LabelLo; + public: + DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo) + : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {} + + /// EmitValue - Emit delta value. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + /// SizeOf - Determine size of delta value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEDelta *) { return true; } + static bool classof(const DIEValue *D) { return D->getType() == isDelta; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEntry - A pointer to another debug information entry. An instance of + /// this class can also be used as a proxy for a debug information entry not + /// yet defined (ie. types.) + class DIEEntry : public DIEValue { + DIE *const Entry; + public: + explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} + + DIE *getEntry() const { return Entry; } + + /// EmitValue - Emit debug information entry offset. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + /// SizeOf - Determine size of debug information entry in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const { + return sizeof(int32_t); + } + + // Implement isa/cast/dyncast. + static bool classof(const DIEEntry *) { return true; } + static bool classof(const DIEValue *E) { return E->getType() == isEntry; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEBlock - A block of values. Primarily used for location expressions. + // + class DIEBlock : public DIEValue, public DIE { + unsigned Size; // Size in bytes excluding size header. + public: + DIEBlock() + : DIEValue(isBlock), DIE(0), Size(0) {} + virtual ~DIEBlock() {} + + /// ComputeSize - calculate the size of the block. + /// + unsigned ComputeSize(AsmPrinter *AP); + + /// BestForm - Choose the best form for data. + /// + unsigned BestForm() const { + if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1; + if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2; + if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4; + return dwarf::DW_FORM_block; + } + + /// EmitValue - Emit block data. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + /// SizeOf - Determine size of block data in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEBlock *) { return true; } + static bool classof(const DIEValue *E) { return E->getType() == isBlock; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O); +#endif + }; + +} // end llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp new file mode 100644 index 0000000..c886a5e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -0,0 +1,3827 @@ +//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" +#include "DwarfDebug.h" +#include "DIE.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Timer.h" +#include "llvm/System/Path.h" +using namespace llvm; + +static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden, + cl::desc("Print DbgScope information for each machine instruction")); + +static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", + cl::Hidden, + cl::desc("Disable debug info printing")); + +static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, + cl::desc("Make an absense of debug location information explicit."), + cl::init(false)); + +namespace { + const char *DWARFGroupName = "DWARF Emission"; + const char *DbgTimerName = "DWARF Debug Writer"; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// + +/// Configuration values for initial hash set sizes (log2). +/// +static const unsigned InitAbbreviationsSetSize = 9; // log2(512) + +namespace llvm { + +//===----------------------------------------------------------------------===// +/// CompileUnit - This dwarf writer support class manages information associate +/// with a source file. +class CompileUnit { + /// ID - File identifier for source. + /// + unsigned ID; + + /// Die - Compile unit debug information entry. + /// + const OwningPtr<DIE> CUDie; + + /// IndexTyDie - An anonymous type for index type. Owned by CUDie. + DIE *IndexTyDie; + + /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton + /// variables to debug information entries. + DenseMap<const MDNode *, DIE *> MDNodeToDieMap; + + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// descriptors to debug information entries using a DIEEntry proxy. + DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; + + /// Globals - A map of globally visible named entities for this unit. + /// + StringMap<DIE*> Globals; + + /// GlobalTypes - A map of globally visible types for this unit. + /// + StringMap<DIE*> GlobalTypes; + +public: + CompileUnit(unsigned I, DIE *D) + : ID(I), CUDie(D), IndexTyDie(0) {} + + // Accessors. + unsigned getID() const { return ID; } + DIE* getCUDie() const { return CUDie.get(); } + const StringMap<DIE*> &getGlobals() const { return Globals; } + const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } + + /// hasContent - Return true if this compile unit has something to write out. + /// + bool hasContent() const { return !CUDie->getChildren().empty(); } + + /// addGlobal - Add a new global entity to the compile unit. + /// + void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } + + /// addGlobalType - Add a new global type to the compile unit. + /// + void addGlobalType(StringRef Name, DIE *Die) { + GlobalTypes[Name] = Die; + } + + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. + DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } + + /// insertDIE - Insert DIE into the map. + void insertDIE(const MDNode *N, DIE *D) { + MDNodeToDieMap.insert(std::make_pair(N, D)); + } + + /// getDIEEntry - Returns the debug information entry for the speciefied + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) { + DenseMap<const MDNode *, DIEEntry *>::iterator I = + MDNodeToDIEEntryMap.find(N); + if (I == MDNodeToDIEEntryMap.end()) + return NULL; + return I->second; + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + /// addDie - Adds or interns the DIE to the compile unit. + /// + void addDie(DIE *Buffer) { + this->CUDie->addChild(Buffer); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { + return IndexTyDie; + } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { + IndexTyDie = D; + } + +}; + +//===----------------------------------------------------------------------===// +/// DbgVariable - This class is used to track local variable information. +/// +class DbgVariable { + DIVariable Var; // Variable Descriptor. + DIE *TheDIE; // Variable DIE. + unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. +public: + // AbsVar may be NULL. + DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {} + + // Accessors. + DIVariable getVariable() const { return Var; } + void setDIE(DIE *D) { TheDIE = D; } + DIE *getDIE() const { return TheDIE; } + void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } + unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } + StringRef getName() const { return Var.getName(); } + unsigned getTag() const { return Var.getTag(); } + bool variableHasComplexAddress() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.hasComplexAddress(); + } + bool isBlockByrefVariable() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.isBlockByrefVariable(); + } + unsigned getNumAddrElements() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.getNumAddrElements(); + } + uint64_t getAddrElement(unsigned i) const { + return Var.getAddrElement(i); + } + DIType getType() const { + DIType Ty = Var.getType(); + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. + if (Var.isBlockByrefVariable()) { + /* Byref variables, in Blocks, are declared by the programmer as + "SomeType VarName;", but the compiler creates a + __Block_byref_x_VarName struct, and gives the variable VarName + either the struct, or a pointer to the struct, as its type. This + is necessary for various behind-the-scenes things the compiler + needs to do with by-reference variables in blocks. + + However, as far as the original *programmer* is concerned, the + variable should still have type 'SomeType', as originally declared. + + The following function dives into the __Block_byref_x_VarName + struct to find the original type of the variable. This will be + passed back to the code generating the type for the Debug + Information Entry for the variable 'VarName'. 'VarName' will then + have the original type 'SomeType' in its debug information. + + The original type 'SomeType' will be the type of the field named + 'VarName' inside the __Block_byref_x_VarName struct. + + NOTE: In order for this to not completely fail on the debugger + side, the Debug Information Entry for the variable VarName needs to + have a DW_AT_location that tells the debugger how to unwind through + the pointers and __Block_byref_x_VarName struct to find the actual + value of the variable. The function addBlockByrefType does this. */ + DIType subType = Ty; + unsigned tag = Ty.getTag(); + + if (tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + subType = DTy.getTypeDerivedFrom(); + } + + DICompositeType blockStruct = DICompositeType(subType); + DIArray Elements = blockStruct.getTypeArray(); + + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + if (getName() == DT.getName()) + return (DT.getTypeDerivedFrom()); + } + return Ty; + } + return Ty; + } +}; + +//===----------------------------------------------------------------------===// +/// DbgRange - This is used to track range of instructions with identical +/// debug info scope. +/// +typedef std::pair<const MachineInstr *, const MachineInstr *> DbgRange; + +//===----------------------------------------------------------------------===// +/// DbgScope - This class is used to track scope information. +/// +class DbgScope { + DbgScope *Parent; // Parent to this scope. + DIDescriptor Desc; // Debug info descriptor for scope. + // Location at which this scope is inlined. + AssertingVH<const MDNode> InlinedAtLocation; + bool AbstractScope; // Abstract Scope + const MachineInstr *LastInsn; // Last instruction of this scope. + const MachineInstr *FirstInsn; // First instruction of this scope. + unsigned DFSIn, DFSOut; + // Scopes defined in scope. Contents not owned. + SmallVector<DbgScope *, 4> Scopes; + // Variables declared in scope. Contents owned. + SmallVector<DbgVariable *, 8> Variables; + SmallVector<DbgRange, 4> Ranges; + // Private state for dump() + mutable unsigned IndentLevel; +public: + DbgScope(DbgScope *P, DIDescriptor D, const MDNode *I = 0) + : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false), + LastInsn(0), FirstInsn(0), + DFSIn(0), DFSOut(0), IndentLevel(0) {} + virtual ~DbgScope(); + + // Accessors. + DbgScope *getParent() const { return Parent; } + void setParent(DbgScope *P) { Parent = P; } + DIDescriptor getDesc() const { return Desc; } + const MDNode *getInlinedAt() const { return InlinedAtLocation; } + const MDNode *getScopeNode() const { return Desc; } + const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; } + const SmallVector<DbgVariable *, 8> &getDbgVariables() { return Variables; } + const SmallVector<DbgRange, 4> &getRanges() { return Ranges; } + + /// openInsnRange - This scope covers instruction range starting from MI. + void openInsnRange(const MachineInstr *MI) { + if (!FirstInsn) + FirstInsn = MI; + + if (Parent) + Parent->openInsnRange(MI); + } + + /// extendInsnRange - Extend the current instruction range covered by + /// this scope. + void extendInsnRange(const MachineInstr *MI) { + assert (FirstInsn && "MI Range is not open!"); + LastInsn = MI; + if (Parent) + Parent->extendInsnRange(MI); + } + + /// closeInsnRange - Create a range based on FirstInsn and LastInsn collected + /// until now. This is used when a new scope is encountered while walking + /// machine instructions. + void closeInsnRange(DbgScope *NewScope = NULL) { + assert (LastInsn && "Last insn missing!"); + Ranges.push_back(DbgRange(FirstInsn, LastInsn)); + FirstInsn = NULL; + LastInsn = NULL; + // If Parent dominates NewScope then do not close Parent's instruction + // range. + if (Parent && (!NewScope || !Parent->dominates(NewScope))) + Parent->closeInsnRange(NewScope); + } + + void setAbstractScope() { AbstractScope = true; } + bool isAbstractScope() const { return AbstractScope; } + + // Depth First Search support to walk and mainpluate DbgScope hierarchy. + unsigned getDFSOut() const { return DFSOut; } + void setDFSOut(unsigned O) { DFSOut = O; } + unsigned getDFSIn() const { return DFSIn; } + void setDFSIn(unsigned I) { DFSIn = I; } + bool dominates(const DbgScope *S) { + if (S == this) + return true; + if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut()) + return true; + return false; + } + + /// addScope - Add a scope to the scope. + /// + void addScope(DbgScope *S) { Scopes.push_back(S); } + + /// addVariable - Add a variable to the scope. + /// + void addVariable(DbgVariable *V) { Variables.push_back(V); } + +#ifndef NDEBUG + void dump() const; +#endif +}; + +} // end llvm namespace + +#ifndef NDEBUG +void DbgScope::dump() const { + raw_ostream &err = dbgs(); + err.indent(IndentLevel); + const MDNode *N = Desc; + N->dump(); + if (AbstractScope) + err << "Abstract Scope\n"; + + IndentLevel += 2; + if (!Scopes.empty()) + err << "Children ...\n"; + for (unsigned i = 0, e = Scopes.size(); i != e; ++i) + if (Scopes[i] != this) + Scopes[i]->dump(); + + IndentLevel -= 2; +} +#endif + +DbgScope::~DbgScope() { + for (unsigned j = 0, M = Variables.size(); j < M; ++j) + delete Variables[j]; +} + +DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) + : Asm(A), MMI(Asm->MMI), FirstCU(0), + AbbreviationsSet(InitAbbreviationsSetSize), + CurrentFnDbgScope(0), PrevLabel(NULL) { + NextStringPoolNumber = 0; + + DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; + DwarfStrSectionSym = TextSectionSym = 0; + DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; + FunctionBeginSym = FunctionEndSym = 0; + DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); + { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + beginModule(M); + } +} +DwarfDebug::~DwarfDebug() { + for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) + DIEBlocks[j]->~DIEBlock(); +} + +MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { + std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str]; + if (Entry.first) return Entry.first; + + Entry.second = NextStringPoolNumber++; + return Entry.first = Asm->GetTempSymbol("string", Entry.second); +} + + +/// assignAbbrevNumber - Define a unique number for the abbreviation. +/// +void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) { + // Profile the node so that we can make it unique. + FoldingSetNodeID ID; + Abbrev.Profile(ID); + + // Check the set for priors. + DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev); + + // If it's newly added. + if (InSet == &Abbrev) { + // Add to abbreviation list. + Abbreviations.push_back(&Abbrev); + + // Assign the vector position + 1 as its number. + Abbrev.setNumber(Abbreviations.size()); + } else { + // Assign existing abbreviation number. + Abbrev.setNumber(InSet->getNumber()); + } +} + +/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug +/// information entry. +DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { + DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); + return Value; +} + +/// addUInt - Add an unsigned integer attribute data and value. +/// +void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, + unsigned Form, uint64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(false, Integer); + DIEValue *Value = Integer == 1 ? + DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addSInt - Add an signed integer attribute data and value. +/// +void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, + unsigned Form, int64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(true, Integer); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addString - Add a string attribute data and value. DIEString only +/// keeps string reference. +void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, + StringRef String) { + DIEValue *Value = new (DIEValueAllocator) DIEString(String); + Die->addValue(Attribute, Form, Value); +} + +/// addLabel - Add a Dwarf label attribute data and value. +/// +void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, Form, Value); +} + +/// addDelta - Add a label delta attribute data and value. +/// +void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo) { + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); + Die->addValue(Attribute, Form, Value); +} + +/// addDIEEntry - Add a DIE attribute data and value. +/// +void DwarfDebug::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, + DIE *Entry) { + Die->addValue(Attribute, Form, createDIEEntry(Entry)); +} + + +/// addBlock - Add block data. +/// +void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form, + DIEBlock *Block) { + Block->ComputeSize(Asm); + DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. + Die->addValue(Attribute, Block->BestForm(), Block); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) { + // Verify variable. + if (!V.Verify()) + return; + + unsigned Line = V.getLineNumber(); + unsigned FileID = GetOrCreateSourceID(V.getContext().getDirectory(), + V.getContext().getFilename()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) { + // Verify global variable. + if (!G.Verify()) + return; + + unsigned Line = G.getLineNumber(); + unsigned FileID = GetOrCreateSourceID(G.getContext().getDirectory(), + G.getContext().getFilename()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) { + // Verify subprogram. + if (!SP.Verify()) + return; + // If the line number is 0, don't add it. + if (SP.getLineNumber() == 0) + return; + + unsigned Line = SP.getLineNumber(); + if (!SP.getContext().Verify()) + return; + unsigned FileID = GetOrCreateSourceID(SP.getDirectory(), + SP.getFilename()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) { + // Verify type. + if (!Ty.Verify()) + return; + + unsigned Line = Ty.getLineNumber(); + if (!Ty.getContext().Verify()) + return; + unsigned FileID = GetOrCreateSourceID(Ty.getContext().getDirectory(), + Ty.getContext().getFilename()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) { + // Verify namespace. + if (!NS.Verify()) + return; + + unsigned Line = NS.getLineNumber(); + StringRef FN = NS.getFilename(); + StringRef Dir = NS.getDirectory(); + + unsigned FileID = GetOrCreateSourceID(Dir, FN); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based +/// on provided frame index. +void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) { + MachineLocation Location; + unsigned FrameReg; + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + Location.set(FrameReg, Offset); + + if (DV->variableHasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV->isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location); +} + +/// addComplexAddress - Start with the address based on the location provided, +/// and generate the DWARF information necessary to find the actual variable +/// given the extra address information encoded in the DIVariable, starting from +/// the starting location. Add the DWARF information to the die. +/// +void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIType Ty = DV->getType(); + + // Decode the original location, and use that as the start of the byref + // variable's location. + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + } else { + Reg = Reg - dwarf::DW_OP_reg0; + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) { + uint64_t Element = DV->getAddrElement(i); + + if (Element == DIFactory::OpPlus) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); + } else if (Element == DIFactory::OpDeref) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else llvm_unreachable("unknown DIFactory Opcode"); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/* Byref variables, in Blocks, are declared by the programmer as "SomeType + VarName;", but the compiler creates a __Block_byref_x_VarName struct, and + gives the variable VarName either the struct, or a pointer to the struct, as + its type. This is necessary for various behind-the-scenes things the + compiler needs to do with by-reference variables in Blocks. + + However, as far as the original *programmer* is concerned, the variable + should still have type 'SomeType', as originally declared. + + The function getBlockByrefType dives into the __Block_byref_x_VarName + struct to find the original type of the variable, which is then assigned to + the variable's Debug Information Entry as its real type. So far, so good. + However now the debugger will expect the variable VarName to have the type + SomeType. So we need the location attribute for the variable to be an + expression that explains to the debugger how to navigate through the + pointers and struct to find the actual variable of type SomeType. + + The following function does just that. We start by getting + the "normal" location for the variable. This will be the location + of either the struct __Block_byref_x_VarName or the pointer to the + struct __Block_byref_x_VarName. + + The struct will look something like: + + struct __Block_byref_x_VarName { + ... <various fields> + struct __Block_byref_x_VarName *forwarding; + ... <various other fields> + SomeType VarName; + ... <maybe more fields> + }; + + If we are given the struct directly (as our starting point) we + need to tell the debugger to: + + 1). Add the offset of the forwarding field. + + 2). Follow that pointer to get the real __Block_byref_x_VarName + struct to use (the real one may have been copied onto the heap). + + 3). Add the offset for the field VarName, to find the actual variable. + + If we started with a pointer to the struct, then we need to + dereference that pointer first, before the other steps. + Translating this into DWARF ops, we will need to append the following + to the current location description for the variable: + + DW_OP_deref -- optional, if we start with a pointer + DW_OP_plus_uconst <forward_fld_offset> + DW_OP_deref + DW_OP_plus_uconst <varName_fld_offset> + + That is what this function does. */ + +/// addBlockByrefAddress - Start with the address based on the location +/// provided, and generate the DWARF information necessary to find the +/// actual Block variable (navigating the Block struct) based on the +/// starting location. Add the DWARF information to the die. For +/// more information, read large comment just above here. +/// +void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIType Ty = DV->getType(); + DIType TmpTy = Ty; + unsigned Tag = Ty.getTag(); + bool isPointer = false; + + StringRef varName = DV->getName(); + + if (Tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + TmpTy = DTy.getTypeDerivedFrom(); + isPointer = true; + } + + DICompositeType blockStruct = DICompositeType(TmpTy); + + // Find the __forwarding field and the variable field in the __Block_byref + // struct. + DIArray Fields = blockStruct.getTypeArray(); + DIDescriptor varField = DIDescriptor(); + DIDescriptor forwardingField = DIDescriptor(); + + for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { + DIDescriptor Element = Fields.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + StringRef fieldName = DT.getName(); + if (fieldName == "__forwarding") + forwardingField = Element; + else if (fieldName == varName) + varField = Element; + } + + // Get the offsets for the forwarding field and the variable field. + unsigned forwardingFieldOffset = + DIDerivedType(forwardingField).getOffsetInBits() >> 3; + unsigned varFieldOffset = + DIDerivedType(varField).getOffsetInBits() >> 3; + + // Decode the original location, and use that as the start of the byref + // variable's location. + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + else { + Reg = Reg - dwarf::DW_OP_reg0; + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + // If we started with a pointer to the __Block_byref... struct, then + // the first thing we need to do is dereference the pointer (DW_OP_deref). + if (isPointer) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Next add the offset for the '__forwarding' field: + // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in + // adding the offset if it's 0. + if (forwardingFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + } + + // Now dereference the __forwarding field to get to the real __Block_byref + // struct: DW_OP_deref. + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Now that we've got the real __Block_byref... struct, add the offset + // for the variable's field to get to the location of the actual variable: + // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. + if (varFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/// addAddress - Add an address attribute to a die based on the location +/// provided. +void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + } else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + } else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + addBlock(Die, Attribute, 0, Block); +} + +/// addRegisterAddress - Add register location entry in variable DIE. +bool DwarfDebug::addRegisterAddress(DIE *Die, const MCSymbol *VS, + const MachineOperand &MO) { + assert (MO.isReg() && "Invalid machine operand!"); + if (!MO.getReg()) + return false; + MachineLocation Location; + Location.set(MO.getReg()); + addAddress(Die, dwarf::DW_AT_location, Location); + if (VS) + addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); + return true; +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool DwarfDebug::addConstantValue(DIE *Die, const MCSymbol *VS, + const MachineOperand &MO) { + assert (MO.isImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned Imm = MO.getImm(); + addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + if (VS) + addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); + return true; +} + +/// addConstantFPValue - Add constant value entry in variable DIE. +bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS, + const MachineOperand &MO) { + assert (MO.isFPImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + APFloat FPImm = MO.getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char*)FltVal.getRawData(); + + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & FltPtr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + if (VS) + addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); + return true; +} + + +/// addToContextOwner - Add Die into the list of its context owner's children. +void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { + if (Context.isType()) { + DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); + ContextDIE->addChild(Die); + } else if (Context.isNameSpace()) { + DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); + ContextDIE->addChild(Die); + } else if (Context.isSubprogram()) { + DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context), + /*MakeDecl=*/false); + ContextDIE->addChild(Die); + } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) + ContextDIE->addChild(Die); + else + getCompileUnit(Context)->addDie(Die); +} + +/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the +/// given DIType. +DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { + CompileUnit *TypeCU = getCompileUnit(Ty); + DIE *TyDIE = TypeCU->getDIE(Ty); + if (TyDIE) + return TyDIE; + + // Create new type. + TyDIE = new DIE(dwarf::DW_TAG_base_type); + TypeCU->insertDIE(Ty, TyDIE); + if (Ty.isBasicType()) + constructTypeDIE(*TyDIE, DIBasicType(Ty)); + else if (Ty.isCompositeType()) + constructTypeDIE(*TyDIE, DICompositeType(Ty)); + else { + assert(Ty.isDerivedType() && "Unknown kind of DIType"); + constructTypeDIE(*TyDIE, DIDerivedType(Ty)); + } + + addToContextOwner(TyDIE, Ty.getContext()); + return TyDIE; +} + +/// addType - Add a new type attribute to the specified entity. +void DwarfDebug::addType(DIE *Entity, DIType Ty) { + if (!Ty.Verify()) + return; + + // Check for pre-existence. + CompileUnit *TypeCU = getCompileUnit(Ty); + DIEEntry *Entry = TypeCU->getDIEEntry(Ty); + // If it exists then use the existing value. + if (Entry) { + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + return; + } + + // Construct type. + DIE *Buffer = getOrCreateTypeDIE(Ty); + + // Set up proxy. + Entry = createDIEEntry(Buffer); + TypeCU->insertDIEEntry(Ty, Entry); + + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); +} + +/// constructTypeDIE - Construct basic type die from DIBasicType. +void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { + // Get core information. + StringRef Name = BTy.getName(); + Buffer.setTag(dwarf::DW_TAG_base_type); + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + uint64_t Size = BTy.getSizeInBits() >> 3; + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); +} + +/// constructTypeDIE - Construct derived type die from DIDerivedType. +void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { + // Get core information. + StringRef Name = DTy.getName(); + uint64_t Size = DTy.getSizeInBits() >> 3; + unsigned Tag = DTy.getTag(); + + // FIXME - Workaround for templates. + if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; + + Buffer.setTag(Tag); + + // Map to main type, void will not have a type. + DIType FromTy = DTy.getTypeDerivedFrom(); + addType(&Buffer, FromTy); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + + // Add source line info if available and TyDesc is not a forward declaration. + if (!DTy.isForwardDecl()) + addSourceLine(&Buffer, DTy); +} + +/// constructTypeDIE - Construct type DIE from DICompositeType. +void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { + // Get core information. + StringRef Name = CTy.getName(); + + uint64_t Size = CTy.getSizeInBits() >> 3; + unsigned Tag = CTy.getTag(); + Buffer.setTag(Tag); + + switch (Tag) { + case dwarf::DW_TAG_vector_type: + case dwarf::DW_TAG_array_type: + constructArrayTypeDIE(Buffer, &CTy); + break; + case dwarf::DW_TAG_enumeration_type: { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIE *ElemDie = NULL; + DIDescriptor Enum(Elements.getElement(i)); + if (Enum.isEnumerator()) { + ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); + Buffer.addChild(ElemDie); + } + } + } + break; + case dwarf::DW_TAG_subroutine_type: { + // Add return type. + DIArray Elements = CTy.getTypeArray(); + DIDescriptor RTy = Elements.getElement(0); + addType(&Buffer, DIType(RTy)); + + // Add prototype flag. + addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + + // Add arguments. + for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIDescriptor Ty = Elements.getElement(i); + addType(Arg, DIType(Ty)); + Buffer.addChild(Arg); + } + } + break; + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: { + // Add elements to structure type. + DIArray Elements = CTy.getTypeArray(); + + // A forward struct declared type may not have elements available. + unsigned N = Elements.getNumElements(); + if (N == 0) + break; + + // Add elements to structure type. + for (unsigned i = 0; i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIE *ElemDie = NULL; + if (Element.isSubprogram()) + ElemDie = createSubprogramDIE(DISubprogram(Element)); + else if (Element.isVariable()) { + DIVariable DV(Element); + ElemDie = new DIE(dwarf::DW_TAG_variable); + addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + DV.getName()); + addType(ElemDie, DV.getType()); + addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addSourceLine(ElemDie, DV); + } else if (Element.isDerivedType()) + ElemDie = createMemberDIE(DIDerivedType(Element)); + else + continue; + Buffer.addChild(ElemDie); + } + + if (CTy.isAppleBlockExtension()) + addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); + + DICompositeType ContainingType = CTy.getContainingType(); + if (DIDescriptor(ContainingType).isCompositeType()) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, + getOrCreateTypeDIE(DIType(ContainingType))); + else { + DIDescriptor Context = CTy.getContext(); + addToContextOwner(&Buffer, Context); + } + break; + } + default: + break; + } + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type + || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + { + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + else { + // Add zero size if it is not a forward declaration. + if (CTy.isForwardDecl()) + addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + else + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + } + + // Add source line info if available. + if (!CTy.isForwardDecl()) + addSourceLine(&Buffer, CTy); + } +} + +/// constructSubrangeDIE - Construct subrange DIE from DISubrange. +void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ + int64_t L = SR.getLo(); + int64_t H = SR.getHi(); + DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); + + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + if (L) + addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + + Buffer.addChild(DW_Subrange); +} + +/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. +void DwarfDebug::constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy) { + Buffer.setTag(dwarf::DW_TAG_array_type); + if (CTy->getTag() == dwarf::DW_TAG_vector_type) + addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + + // Emit derived type. + addType(&Buffer, CTy->getTypeDerivedFrom()); + DIArray Elements = CTy->getTypeArray(); + + // Get an anonymous type for index type. + CompileUnit *TheCU = getCompileUnit(*CTy); + DIE *IdxTy = TheCU->getIndexTyDie(); + if (!IdxTy) { + // Construct an anonymous type for index type. + IdxTy = new DIE(dwarf::DW_TAG_base_type); + addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_signed); + TheCU->addDie(IdxTy); + TheCU->setIndexTyDie(IdxTy); + } + + // Add subranges to array type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + if (Element.getTag() == dwarf::DW_TAG_subrange_type) + constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); + } +} + +/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. +DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) { + DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); + StringRef Name = ETy.getName(); + addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + int64_t Value = ETy.getEnumValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + return Enumerator; +} + +/// getRealLinkageName - If special LLVM prefix that is used to inform the asm +/// printer to not emit usual symbol prefix before the symbol name is used then +/// return linkage name after skipping this special LLVM prefix. +static StringRef getRealLinkageName(StringRef LinkageName) { + char One = '\1'; + if (LinkageName.startswith(StringRef(&One, 1))) + return LinkageName.substr(1); + return LinkageName; +} + +/// createMemberDIE - Create new member DIE. +DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) { + DIE *MemberDie = new DIE(DT.getTag()); + StringRef Name = DT.getName(); + if (!Name.empty()) + addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + addType(MemberDie, DT.getTypeDerivedFrom()); + + addSourceLine(MemberDie, DT); + + DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = DT.getOriginalTypeSize(); + + if (Size != FieldSize) { + // Handle bitfield. + addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getTargetData().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); + + // Here WD_AT_data_member_location points to the anonymous + // field that includes this bit field. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); + + } else + // This is not a bitfield. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); + + if (DT.getTag() == dwarf::DW_TAG_inheritance + && DT.isVirtual()) { + + // For C++, virtual base classes are not at fixed offset. Use following + // expression to extract appropriate offset from vtable. + // BaseAddr = ObAddr + *((*ObAddr) - Offset) + + DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, + VBaseLocationDie); + } else + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + + if (DT.isProtected()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_private); + else if (DT.getTag() == dwarf::DW_TAG_inheritance) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (DT.isVirtual()) + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, + dwarf::DW_VIRTUALITY_virtual); + return MemberDie; +} + +/// createSubprogramDIE - Create new DIE using SP. +DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) { + CompileUnit *SPCU = getCompileUnit(SP); + DIE *SPDie = SPCU->getDIE(SP); + if (SPDie) + return SPDie; + + SPDie = new DIE(dwarf::DW_TAG_subprogram); + // Constructors and operators for anonymous aggregates do not have names. + if (!SP.getName().empty()) + addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); + + StringRef LinkageName = SP.getLinkageName(); + if (!LinkageName.empty()) + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); + + addSourceLine(SPDie, SP); + + // Add prototyped tag, if C or ObjC. + unsigned Lang = SP.getCompileUnit().getLanguage(); + if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 || + Lang == dwarf::DW_LANG_ObjC) + addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + + // Add Return Type. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + + if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) + addType(SPDie, SPTy); + else + addType(SPDie, DIType(Args.getElement(0))); + + unsigned VK = SP.getVirtuality(); + if (VK) { + addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); + ContainingTypeMap.insert(std::make_pair(SPDie, + SP.getContainingType())); + } + + if (MakeDecl || !SP.isDefinition()) { + addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + + // Add arguments. Do not add arguments for subprogram definition. They will + // be handled while processing variables. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(DIType(Args.getElement(i))); + addType(Arg, ATy); + if (ATy.isArtificial()) + addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPDie->addChild(Arg); + } + } + + if (SP.isArtificial()) + addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + + if (!SP.isLocalToUnit()) + addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + + if (SP.isOptimized()) + addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + + if (unsigned isa = Asm->getISAEncoding()) { + addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + } + + // DW_TAG_inlined_subroutine may refer to this DIE. + SPCU->insertDIE(SP, SPDie); + + // Add to context owner. + addToContextOwner(SPDie, SP.getContext()); + + return SPDie; +} + +DbgScope *DwarfDebug::getOrCreateAbstractScope(const MDNode *N) { + assert(N && "Invalid Scope encoding!"); + + DbgScope *AScope = AbstractScopes.lookup(N); + if (AScope) + return AScope; + + DbgScope *Parent = NULL; + + DIDescriptor Scope(N); + if (Scope.isLexicalBlock()) { + DILexicalBlock DB(N); + DIDescriptor ParentDesc = DB.getContext(); + Parent = getOrCreateAbstractScope(ParentDesc); + } + + AScope = new DbgScope(Parent, DIDescriptor(N), NULL); + + if (Parent) + Parent->addScope(AScope); + AScope->setAbstractScope(); + AbstractScopes[N] = AScope; + if (DIDescriptor(N).isSubprogram()) + AbstractScopesList.push_back(AScope); + return AScope; +} + +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +static bool isSubprogramContext(const MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(DIType(Context).getContext()); + return false; +} + +/// updateSubprogramScopeDIE - Find DIE for the given subprogram and +/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. +/// If there are global variables in this scope then create and insert +/// DIEs for these variables. +DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { + CompileUnit *SPCU = getCompileUnit(SPNode); + DIE *SPDie = SPCU->getDIE(SPNode); + + assert(SPDie && "Unable to find subprogram DIE!"); + DISubprogram SP(SPNode); + + // There is not any need to generate specification DIE for a function + // defined at compile unit level. If a function is defined inside another + // function then gdb prefers the definition at top level and but does not + // expect specification DIE in parent function. So avoid creating + // specification DIE for a function defined inside a function. + if (SP.isDefinition() && !SP.getContext().isCompileUnit() && + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext())) { + addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(DIType(Args.getElement(i))); + addType(Arg, ATy); + if (ATy.isArtificial()) + addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + SPDeclDie); + SPCU->addDie(SPDie); + } + + // Pick up abstract subprogram DIE. + if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + SPDie = new DIE(dwarf::DW_TAG_subprogram); + addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsSPDIE); + SPCU->addDie(SPDie); + } + + addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); + addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + MachineLocation Location(RI->getFrameRegister(*Asm->MF)); + addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + + return SPDie; +} + +/// constructLexicalScope - Construct new DW_TAG_lexical_block +/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. +DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { + + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); + if (Scope->isAbstractScope()) + return ScopeDIE; + + const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges(); + if (Ranges.empty()) + return 0; + + SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); + if (Ranges.size() > 1) { + // .debug_range section has not been laid out yet. Emit offset in + // .debug_range as a uint, size 4, for now. emitDIE will handle + // DW_AT_ranges appropriately. + addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, + DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); + for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), + RE = Ranges.end(); RI != RE; ++RI) { + DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); + DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); + } + DebugRangeSymbols.push_back(NULL); + DebugRangeSymbols.push_back(NULL); + return ScopeDIE; + } + + const MCSymbol *Start = getLabelBeforeInsn(RI->first); + const MCSymbol *End = getLabelAfterInsn(RI->second); + + if (End == 0) return 0; + + assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); + assert(End->isDefined() && "Invalid end label for an inlined scope!"); + + addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); + addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); + + return ScopeDIE; +} + +/// constructInlinedScopeDIE - This scope represents inlined body of +/// a function. Construct DIE to represent this concrete inlined copy +/// of the function. +DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { + + const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges(); + assert (Ranges.empty() == false + && "DbgScope does not have instruction markers!"); + + // FIXME : .debug_inlined section specification does not clearly state how + // to emit inlined scope that is split into multiple instruction ranges. + // For now, use first instruction range and emit low_pc/high_pc pair and + // corresponding .debug_inlined section entry for this pair. + SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); + const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); + const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); + + if (StartLabel == 0 || EndLabel == 0) { + assert (0 && "Unexpected Start and End labels for a inlined scope!"); + return 0; + } + assert(StartLabel->isDefined() && + "Invalid starting label for an inlined scope!"); + assert(EndLabel->isDefined() && + "Invalid end label for an inlined scope!"); + + if (!Scope->getScopeNode()) + return NULL; + DIScope DS(Scope->getScopeNode()); + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); + + DISubprogram InlinedSP = getDISubprogram(DS); + CompileUnit *TheCU = getCompileUnit(InlinedSP); + DIE *OriginDIE = TheCU->getDIE(InlinedSP); + assert(OriginDIE && "Unable to find Origin DIE!"); + addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, OriginDIE); + + addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel); + addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel); + + InlinedSubprogramDIEs.insert(OriginDIE); + + // Track the start label for this inlined function. + DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator + I = InlineInfo.find(InlinedSP); + + if (I == InlineInfo.end()) { + InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, + ScopeDIE)); + InlinedSPNodes.push_back(InlinedSP); + } else + I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); + + DILocation DL(Scope->getInlinedAt()); + addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); + addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + + return ScopeDIE; +} + + +/// constructVariableDIE - Construct a DIE for the given DbgVariable. +DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { + StringRef Name = DV->getName(); + if (Name.empty()) + return NULL; + + // Translate tag to proper Dwarf tag. The result variable is dropped for + // now. + unsigned Tag; + switch (DV->getTag()) { + case dwarf::DW_TAG_return_variable: + return NULL; + case dwarf::DW_TAG_arg_variable: + Tag = dwarf::DW_TAG_formal_parameter; + break; + case dwarf::DW_TAG_auto_variable: // fall thru + default: + Tag = dwarf::DW_TAG_variable; + break; + } + + // Define variable debug information entry. + DIE *VariableDie = new DIE(Tag); + + DIE *AbsDIE = NULL; + DenseMap<const DbgVariable *, const DbgVariable *>::iterator + V2AVI = VarToAbstractVarMap.find(DV); + if (V2AVI != VarToAbstractVarMap.end()) + AbsDIE = V2AVI->second->getDIE(); + + if (AbsDIE) + addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsDIE); + else { + addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addSourceLine(VariableDie, DV->getVariable()); + + // Add variable type. + addType(VariableDie, DV->getType()); + } + + if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial()) + addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + + if (Scope->isAbstractScope()) { + DV->setDIE(VariableDie); + return VariableDie; + } + + // Add variable address. + + unsigned Offset = DV->getDotDebugLocOffset(); + if (Offset != ~0U) { + addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, + Asm->GetTempSymbol("debug_loc", Offset)); + DV->setDIE(VariableDie); + UseDotDebugLocEntry.insert(VariableDie); + return VariableDie; + } + + // Check if variable is described by a DBG_VALUE instruction. + DenseMap<const DbgVariable *, const MachineInstr *>::iterator DVI = + DbgVariableToDbgInstMap.find(DV); + if (DVI != DbgVariableToDbgInstMap.end()) { + const MachineInstr *DVInsn = DVI->second; + const MCSymbol *DVLabel = findVariableLabel(DV); + bool updated = false; + // FIXME : Handle getNumOperands != 3 + if (DVInsn->getNumOperands() == 3) { + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + if (DVInsn->getOperand(1).isImm() && + TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { + addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm()); + updated = true; + } else + updated = addRegisterAddress(VariableDie, DVLabel, RegOp); + } + else if (DVInsn->getOperand(0).isImm()) + updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isFPImm()) + updated = + addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0)); + } else { + MachineLocation Location = Asm->getDebugValueLocation(DVInsn); + if (Location.getReg()) { + addAddress(VariableDie, dwarf::DW_AT_location, Location); + if (DVLabel) + addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, + DVLabel); + updated = true; + } + } + if (!updated) { + // If variableDie is not updated then DBG_VALUE instruction does not + // have valid variable info. + delete VariableDie; + return NULL; + } + DV->setDIE(VariableDie); + return VariableDie; + } + + // .. else use frame index, if available. + int FI = 0; + if (findVariableFrameIndex(DV, &FI)) + addVariableAddress(DV, VariableDie, FI); + + DV->setDIE(VariableDie); + return VariableDie; + +} + +void DwarfDebug::addPubTypes(DISubprogram SP) { + DICompositeType SPTy = SP.getType(); + unsigned SPTag = SPTy.getTag(); + if (SPTag != dwarf::DW_TAG_subroutine_type) + return; + + DIArray Args = SPTy.getTypeArray(); + for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { + DIType ATy(Args.getElement(i)); + if (!ATy.Verify()) + continue; + DICompositeType CATy = getDICompositeType(ATy); + if (DIDescriptor(CATy).Verify() && !CATy.getName().empty() + && !CATy.isForwardDecl()) { + CompileUnit *TheCU = getCompileUnit(CATy); + if (DIEEntry *Entry = TheCU->getDIEEntry(CATy)) + TheCU->addGlobalType(CATy.getName(), Entry->getEntry()); + } + } +} + +/// constructScopeDIE - Construct a DIE for this scope. +DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { + if (!Scope || !Scope->getScopeNode()) + return NULL; + + DIScope DS(Scope->getScopeNode()); + DIE *ScopeDIE = NULL; + if (Scope->getInlinedAt()) + ScopeDIE = constructInlinedScopeDIE(Scope); + else if (DS.isSubprogram()) { + ProcessedSPNodes.insert(DS); + if (Scope->isAbstractScope()) { + ScopeDIE = getCompileUnit(DS)->getDIE(DS); + // Note down abstract DIE. + if (ScopeDIE) + AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); + } + else + ScopeDIE = updateSubprogramScopeDIE(DS); + } + else + ScopeDIE = constructLexicalScopeDIE(Scope); + if (!ScopeDIE) return NULL; + + // Add variables to scope. + const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) { + DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); + if (VariableDIE) + ScopeDIE->addChild(VariableDIE); + } + + // Add nested scopes. + const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes(); + for (unsigned j = 0, M = Scopes.size(); j < M; ++j) { + // Define the Scope debug information entry. + DIE *NestedDIE = constructScopeDIE(Scopes[j]); + if (NestedDIE) + ScopeDIE->addChild(NestedDIE); + } + + if (DS.isSubprogram()) + addPubTypes(DISubprogram(DS)); + + return ScopeDIE; +} + +/// GetOrCreateSourceID - Look up the source id with the given directory and +/// source file names. If none currently exists, create a new id and insert it +/// in the SourceIds map. This can update DirectoryNames and SourceFileNames +/// maps as well. +unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){ + unsigned DId; + assert (DirName.empty() == false && "Invalid directory name!"); + + StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName); + if (DI != DirectoryIdMap.end()) { + DId = DI->getValue(); + } else { + DId = DirectoryNames.size() + 1; + DirectoryIdMap[DirName] = DId; + DirectoryNames.push_back(DirName); + } + + unsigned FId; + StringMap<unsigned>::iterator FI = SourceFileIdMap.find(FileName); + if (FI != SourceFileIdMap.end()) { + FId = FI->getValue(); + } else { + FId = SourceFileNames.size() + 1; + SourceFileIdMap[FileName] = FId; + SourceFileNames.push_back(FileName); + } + + DenseMap<std::pair<unsigned, unsigned>, unsigned>::iterator SI = + SourceIdMap.find(std::make_pair(DId, FId)); + if (SI != SourceIdMap.end()) + return SI->second; + + unsigned SrcId = SourceIds.size() + 1; // DW_AT_decl_file cannot be 0. + SourceIdMap[std::make_pair(DId, FId)] = SrcId; + SourceIds.push_back(std::make_pair(DId, FId)); + + return SrcId; +} + +/// getOrCreateNameSpace - Create a DIE for DINameSpace. +DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) { + CompileUnit *TheCU = getCompileUnit(NS); + DIE *NDie = TheCU->getDIE(NS); + if (NDie) + return NDie; + NDie = new DIE(dwarf::DW_TAG_namespace); + TheCU->insertDIE(NS, NDie); + if (!NS.getName().empty()) + addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); + addSourceLine(NDie, NS); + addToContextOwner(NDie, NS.getContext()); + return NDie; +} + +/// constructCompileUnit - Create new CompileUnit for the given +/// metadata node with tag DW_TAG_compile_unit. +void DwarfDebug::constructCompileUnit(const MDNode *N) { + DICompileUnit DIUnit(N); + StringRef FN = DIUnit.getFilename(); + StringRef Dir = DIUnit.getDirectory(); + unsigned ID = GetOrCreateSourceID(Dir, FN); + + DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); + addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, + DIUnit.getProducer()); + addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, + DIUnit.getLanguage()); + addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); + // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This + // simplifies debug range entries. + addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. + if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList()) + addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("section_line")); + else + addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + + if (!Dir.empty()) + addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); + if (DIUnit.isOptimized()) + addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + + StringRef Flags = DIUnit.getFlags(); + if (!Flags.empty()) + addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); + + unsigned RVer = DIUnit.getRunTimeVersion(); + if (RVer) + addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + dwarf::DW_FORM_data1, RVer); + + CompileUnit *NewCU = new CompileUnit(ID, Die); + if (!FirstCU) + FirstCU = NewCU; + CUMap.insert(std::make_pair(N, NewCU)); +} + +/// getCompielUnit - Get CompileUnit DIE. +CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const { + assert (N && "Invalid DwarfDebug::getCompileUnit argument!"); + DIDescriptor D(N); + const MDNode *CUNode = NULL; + if (D.isCompileUnit()) + CUNode = N; + else if (D.isSubprogram()) + CUNode = DISubprogram(N).getCompileUnit(); + else if (D.isType()) + CUNode = DIType(N).getCompileUnit(); + else if (D.isGlobalVariable()) + CUNode = DIGlobalVariable(N).getCompileUnit(); + else if (D.isVariable()) + CUNode = DIVariable(N).getCompileUnit(); + else if (D.isNameSpace()) + CUNode = DINameSpace(N).getCompileUnit(); + else if (D.isFile()) + CUNode = DIFile(N).getCompileUnit(); + else + return FirstCU; + + DenseMap<const MDNode *, CompileUnit *>::const_iterator I + = CUMap.find(CUNode); + if (I == CUMap.end()) + return FirstCU; + return I->second; +} + +/// isUnsignedDIType - Return true if type encoding is unsigned. +static bool isUnsignedDIType(DIType Ty) { + DIDerivedType DTy(Ty); + if (DTy.Verify()) + return isUnsignedDIType(DTy.getTypeDerivedFrom()); + + DIBasicType BTy(Ty); + if (BTy.Verify()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char) + return true; + } + return false; +} + +/// constructGlobalVariableDIE - Construct global variable DIE. +void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { + DIGlobalVariable GV(N); + + // If debug information is malformed then ignore it. + if (GV.Verify() == false) + return; + + // Check for pre-existence. + CompileUnit *TheCU = getCompileUnit(N); + if (TheCU->getDIE(GV)) + return; + + DIType GTy = GV.getType(); + DIE *VariableDIE = new DIE(GV.getTag()); + + bool isGlobalVariable = GV.getGlobal() != NULL; + + // Add name. + addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, + GV.getDisplayName()); + StringRef LinkageName = GV.getLinkageName(); + if (!LinkageName.empty() && isGlobalVariable) + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); + // Add type. + addType(VariableDIE, GTy); + if (GTy.isCompositeType() && !GTy.getName().empty() + && !GTy.isForwardDecl()) { + DIEEntry *Entry = TheCU->getDIEEntry(GTy); + assert(Entry && "Missing global type!"); + TheCU->addGlobalType(GTy.getName(), Entry->getEntry()); + } + // Add scoping info. + if (!GV.isLocalToUnit()) { + addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + // Expose as global. + TheCU->addGlobal(GV.getName(), VariableDIE); + } + // Add line number info. + addSourceLine(VariableDIE, GV); + // Add to map. + TheCU->insertDIE(N, VariableDIE); + // Add to context owner. + DIDescriptor GVContext = GV.getContext(); + addToContextOwner(VariableDIE, GVContext); + // Add location. + if (isGlobalVariable) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getSymbol(GV.getGlobal())); + // Do not create specification DIE if context is either compile unit + // or a subprogram. + if (GV.isDefinition() && !GVContext.isCompileUnit() && + !GVContext.isFile() && !isSubprogramContext(GVContext)) { + // Create specification DIE. + DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); + addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, + dwarf::DW_FORM_ref4, VariableDIE); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + TheCU->addDie(VariableSpecDIE); + } else { + addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + } + } else if (Constant *C = GV.getConstant()) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { + if (isUnsignedDIType(GTy)) + addUInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + CI->getZExtValue()); + else + addSInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + CI->getSExtValue()); + } + } + return; +} + +/// construct SubprogramDIE - Construct subprogram DIE. +void DwarfDebug::constructSubprogramDIE(const MDNode *N) { + DISubprogram SP(N); + + // Check for pre-existence. + CompileUnit *TheCU = getCompileUnit(N); + if (TheCU->getDIE(N)) + return; + + if (!SP.isDefinition()) + // This is a method declaration which will be handled while constructing + // class type. + return; + + DIE *SubprogramDie = createSubprogramDIE(SP); + + // Add to map. + TheCU->insertDIE(N, SubprogramDie); + + // Add to context owner. + addToContextOwner(SubprogramDie, SP.getContext()); + + // Expose as global. + TheCU->addGlobal(SP.getName(), SubprogramDie); + + return; +} + +/// beginModule - Emit all Dwarf sections that should come prior to the +/// content. Create global DIEs and emit initial debug info sections. +/// This is inovked by the target AsmPrinter. +void DwarfDebug::beginModule(Module *M) { + if (DisableDebugInfoPrinting) + return; + + DebugInfoFinder DbgFinder; + DbgFinder.processModule(*M); + + bool HasDebugInfo = false; + + // Scan all the compile-units to see if there are any marked as the main unit. + // if not, we do not generate debug info. + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) { + if (DICompileUnit(*I).isMain()) { + HasDebugInfo = true; + break; + } + } + + if (!HasDebugInfo) return; + + // Tell MMI that we have debug info. + MMI->setDebugInfoAvailability(true); + + // Emit initial sections. + EmitSectionLabels(); + + // Create all the compile unit DIEs. + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) + constructCompileUnit(*I); + + // Create DIEs for each subprogram. + for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), + E = DbgFinder.subprogram_end(); I != E; ++I) + constructSubprogramDIE(*I); + + // Create DIEs for each global variable. + for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), + E = DbgFinder.global_variable_end(); I != E; ++I) + constructGlobalVariableDIE(*I); + + //getOrCreateTypeDIE + if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + + // Prime section data. + SectionMap.insert(Asm->getObjFileLowering().getTextSection()); + + // Print out .file directives to specify files for .loc directives. These are + // printed out early so that they precede any .loc directives. + if (Asm->MAI->hasDotLocAndDotFile()) { + for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) { + // Remember source id starts at 1. + std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i); + // FIXME: don't use sys::path for this! This should not depend on the + // host. + sys::Path FullPath(getSourceDirectoryName(Id.first)); + bool AppendOk = + FullPath.appendComponent(getSourceFileName(Id.second)); + assert(AppendOk && "Could not append filename to directory!"); + AppendOk = false; + Asm->OutStreamer.EmitDwarfFileDirective(i, FullPath.str()); + } + } +} + +/// endModule - Emit all Dwarf sections that should come after the content. +/// +void DwarfDebug::endModule() { + if (!FirstCU) return; + const Module *M = MMI->getModule(); + DenseMap<const MDNode *, DbgScope *> DeadFnScopeMap; + if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) { + for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) { + if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue; + DISubprogram SP(AllSPs->getOperand(SI)); + if (!SP.Verify()) continue; + + // Collect info for variables that were optimized out. + if (!SP.isDefinition()) continue; + StringRef FName = SP.getLinkageName(); + if (FName.empty()) + FName = SP.getName(); + NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName))); + if (!NMD) continue; + unsigned E = NMD->getNumOperands(); + if (!E) continue; + DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL); + DeadFnScopeMap[SP] = Scope; + for (unsigned I = 0; I != E; ++I) { + DIVariable DV(NMD->getOperand(I)); + if (!DV.Verify()) continue; + Scope->addVariable(new DbgVariable(DV)); + } + + // Construct subprogram DIE and add variables DIEs. + constructSubprogramDIE(SP); + DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP); + const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) { + DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); + if (VariableDIE) + ScopeDIE->addChild(VariableDIE); + } + } + } + + // Attach DW_AT_inline attribute with inlined subprogram DIEs. + for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), + AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { + DIE *ISP = *AI; + addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + } + + for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), + CE = ContainingTypeMap.end(); CI != CE; ++CI) { + DIE *SPDie = CI->first; + const MDNode *N = dyn_cast_or_null<MDNode>(CI->second); + if (!N) continue; + DIE *NDie = getCompileUnit(N)->getDIE(N); + if (!NDie) continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + } + + // Standard sections final addresses. + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end")); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection()); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end")); + + // End text sections. + for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) { + Asm->OutStreamer.SwitchSection(SectionMap[i]); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i)); + } + + // Emit common frame information. + emitCommonDebugFrame(); + + // Emit function debug frame information + for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(), + E = DebugFrames.end(); I != E; ++I) + emitFunctionDebugFrame(*I); + + // Compute DIE offsets and sizes. + computeSizeAndOffsets(); + + // Emit all the DIEs into a debug info section + emitDebugInfo(); + + // Corresponding abbreviations into a abbrev section. + emitAbbreviations(); + + // Emit source line correspondence into a debug line section. + emitDebugLines(); + + // Emit info into a debug pubnames section. + emitDebugPubNames(); + + // Emit info into a debug pubtypes section. + emitDebugPubTypes(); + + // Emit info into a debug loc section. + emitDebugLoc(); + + // Emit info into a debug aranges section. + EmitDebugARanges(); + + // Emit info into a debug ranges section. + emitDebugRanges(); + + // Emit info into a debug macinfo section. + emitDebugMacInfo(); + + // Emit inline info. + emitDebugInlineInfo(); + + // Emit info into a debug str section. + emitDebugStr(); + + // clean up. + DeleteContainerSeconds(DeadFnScopeMap); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) + delete I->second; + FirstCU = NULL; // Reset for the next Module, if any. +} + +/// findAbstractVariable - Find abstract variable, if any, associated with Var. +DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, + DebugLoc ScopeLoc) { + + DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var); + if (AbsDbgVariable) + return AbsDbgVariable; + + LLVMContext &Ctx = Var->getContext(); + DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx)); + if (!Scope) + return NULL; + + AbsDbgVariable = new DbgVariable(Var); + Scope->addVariable(AbsDbgVariable); + AbstractVariables[Var] = AbsDbgVariable; + return AbsDbgVariable; +} + +/// collectVariableInfoFromMMITable - Collect variable information from +/// side table maintained by MMI. +void +DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, + SmallPtrSet<const MDNode *, 16> &Processed) { + const LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); + for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), + VE = VMap.end(); VI != VE; ++VI) { + const MDNode *Var = VI->first; + if (!Var) continue; + Processed.insert(Var); + DIVariable DV(Var); + const std::pair<unsigned, DebugLoc> &VP = VI->second; + + DbgScope *Scope = 0; + if (const MDNode *IA = VP.second.getInlinedAt(Ctx)) + Scope = ConcreteScopes.lookup(IA); + if (Scope == 0) + Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx)); + + // If variable scope is not found then skip this variable. + if (Scope == 0) + continue; + + DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); + DbgVariable *RegVar = new DbgVariable(DV); + recordVariableFrameIndex(RegVar, VP.first); + Scope->addVariable(RegVar); + if (AbsDbgVariable) { + recordVariableFrameIndex(AbsDbgVariable, VP.first); + VarToAbstractVarMap[RegVar] = AbsDbgVariable; + } + } +} + +/// isDbgValueInUndefinedReg - Return true if debug value, encoded by +/// DBG_VALUE instruction, is in undefined reg. +static bool isDbgValueInUndefinedReg(const MachineInstr *MI) { + assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); + if (MI->getOperand(0).isReg() && !MI->getOperand(0).getReg()) + return true; + return false; +} + +/// isDbgValueInDefinedReg - Return true if debug value, encoded by +/// DBG_VALUE instruction, is in a defined reg. +static bool isDbgValueInDefinedReg(const MachineInstr *MI) { + assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); + if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) + return true; + return false; +} + +/// collectVariableInfo - Populate DbgScope entries with variables' info. +void +DwarfDebug::collectVariableInfo(const MachineFunction *MF, + SmallPtrSet<const MDNode *, 16> &Processed) { + + /// collection info from MMI table. + collectVariableInfoFromMMITable(MF, Processed); + + SmallVector<const MachineInstr *, 8> DbgValues; + // Collect variable information from DBG_VALUE machine instructions; + for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MInsn = II; + if (!MInsn->isDebugValue() || isDbgValueInUndefinedReg(MInsn)) + continue; + DbgValues.push_back(MInsn); + } + + // This is a collection of DBV_VALUE instructions describing same variable. + SmallVector<const MachineInstr *, 4> MultipleValues; + for(SmallVector<const MachineInstr *, 8>::iterator I = DbgValues.begin(), + E = DbgValues.end(); I != E; ++I) { + const MachineInstr *MInsn = *I; + MultipleValues.clear(); + if (isDbgValueInDefinedReg(MInsn)) + MultipleValues.push_back(MInsn); + DIVariable DV(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata()); + if (Processed.count(DV) != 0) + continue; + + const MachineInstr *PrevMI = MInsn; + for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1, + ME = DbgValues.end(); MI != ME; ++MI) { + const MDNode *Var = + (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata(); + if (Var == DV && isDbgValueInDefinedReg(*MI) && + !PrevMI->isIdenticalTo(*MI)) + MultipleValues.push_back(*MI); + PrevMI = *MI; + } + + DbgScope *Scope = findDbgScope(MInsn); + bool CurFnArg = false; + if (DV.getTag() == dwarf::DW_TAG_arg_variable && + DISubprogram(DV.getContext()).describes(MF->getFunction())) + CurFnArg = true; + if (!Scope && CurFnArg) + Scope = CurrentFnDbgScope; + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + Processed.insert(DV); + DbgVariable *RegVar = new DbgVariable(DV); + Scope->addVariable(RegVar); + if (!CurFnArg) + DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn); + if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { + DbgVariableToDbgInstMap[AbsVar] = MInsn; + VarToAbstractVarMap[RegVar] = AbsVar; + } + if (MultipleValues.size() <= 1) { + DbgVariableToDbgInstMap[RegVar] = MInsn; + continue; + } + + // handle multiple DBG_VALUE instructions describing one variable. + if (DotDebugLocEntries.empty()) + RegVar->setDotDebugLocOffset(0); + else + RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); + const MachineInstr *Begin = NULL; + const MachineInstr *End = NULL; + for (SmallVector<const MachineInstr *, 4>::iterator + MVI = MultipleValues.begin(), MVE = MultipleValues.end(); + MVI != MVE; ++MVI) { + if (!Begin) { + Begin = *MVI; + continue; + } + End = *MVI; + MachineLocation MLoc; + if (Begin->getNumOperands() == 3) { + if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) + MLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm()); + } else + MLoc = Asm->getDebugValueLocation(Begin); + + const MCSymbol *FLabel = getLabelBeforeInsn(Begin); + const MCSymbol *SLabel = getLabelBeforeInsn(End); + if (MLoc.getReg()) + DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc)); + + Begin = End; + if (MVI + 1 == MVE) { + // If End is the last instruction then its value is valid + // until the end of the funtion. + MachineLocation EMLoc; + if (End->getNumOperands() == 3) { + if (End->getOperand(0).isReg() && Begin->getOperand(1).isImm()) + EMLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm()); + } else + EMLoc = Asm->getDebugValueLocation(End); + if (EMLoc.getReg()) + DotDebugLocEntries. + push_back(DotDebugLocEntry(SLabel, FunctionEndSym, EMLoc)); + } + } + DotDebugLocEntries.push_back(DotDebugLocEntry()); + } + + // Collect info for variables that were optimized out. + const Function *F = MF->getFunction(); + const Module *M = F->getParent(); + if (NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", + getRealLinkageName(F->getName())))) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIVariable DV(cast<MDNode>(NMD->getOperand(i))); + if (!DV || !Processed.insert(DV)) + continue; + DbgScope *Scope = DbgScopeMap.lookup(DV.getContext()); + if (Scope) + Scope->addVariable(new DbgVariable(DV)); + } + } +} + +/// getLabelBeforeInsn - Return Label preceding the instruction. +const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsBeforeInsn.find(MI); + if (I == LabelsBeforeInsn.end()) + // FunctionBeginSym always preceeds all the instruction in current function. + return FunctionBeginSym; + return I->second; +} + +/// getLabelAfterInsn - Return Label immediately following the instruction. +const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsAfterInsn.find(MI); + if (I == LabelsAfterInsn.end()) + return NULL; + return I->second; +} + +/// beginScope - Process beginning of a scope. +void DwarfDebug::beginScope(const MachineInstr *MI) { + if (InsnNeedsLabel.count(MI) == 0) { + LabelsBeforeInsn[MI] = PrevLabel; + return; + } + + // Check location. + DebugLoc DL = MI->getDebugLoc(); + if (!DL.isUnknown()) { + const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); + PrevLabel = recordSourceLine(DL.getLine(), DL.getCol(), Scope); + PrevInstLoc = DL; + LabelsBeforeInsn[MI] = PrevLabel; + return; + } + + // If location is unknown then use temp label for this DBG_VALUE + // instruction. + if (MI->isDebugValue()) { + PrevLabel = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(PrevLabel); + LabelsBeforeInsn[MI] = PrevLabel; + return; + } + + if (UnknownLocations) { + PrevLabel = recordSourceLine(0, 0, 0); + LabelsBeforeInsn[MI] = PrevLabel; + return; + } + + assert (0 && "Instruction is not processed!"); +} + +/// endScope - Process end of a scope. +void DwarfDebug::endScope(const MachineInstr *MI) { + if (InsnsEndScopeSet.count(MI) != 0) { + // Emit a label if this instruction ends a scope. + MCSymbol *Label = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(Label); + LabelsAfterInsn[MI] = Label; + } +} + +/// getOrCreateDbgScope - Create DbgScope for the scope. +DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, + const MDNode *InlinedAt) { + if (!InlinedAt) { + DbgScope *WScope = DbgScopeMap.lookup(Scope); + if (WScope) + return WScope; + WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL); + DbgScopeMap.insert(std::make_pair(Scope, WScope)); + if (DIDescriptor(Scope).isLexicalBlock()) { + DbgScope *Parent = + getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL); + WScope->setParent(Parent); + Parent->addScope(WScope); + } + + if (!WScope->getParent()) { + StringRef SPName = DISubprogram(Scope).getLinkageName(); + // We used to check only for a linkage name, but that fails + // since we began omitting the linkage name for private + // functions. The new way is to check for the name in metadata, + // but that's not supported in old .ll test cases. Ergo, we + // check both. + if (SPName == Asm->MF->getFunction()->getName() || + DISubprogram(Scope).getFunction() == Asm->MF->getFunction()) + CurrentFnDbgScope = WScope; + } + + return WScope; + } + + getOrCreateAbstractScope(Scope); + DbgScope *WScope = DbgScopeMap.lookup(InlinedAt); + if (WScope) + return WScope; + + WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt); + DbgScopeMap.insert(std::make_pair(InlinedAt, WScope)); + DILocation DL(InlinedAt); + DbgScope *Parent = + getOrCreateDbgScope(DL.getScope(), DL.getOrigLocation()); + WScope->setParent(Parent); + Parent->addScope(WScope); + + ConcreteScopes[InlinedAt] = WScope; + + return WScope; +} + +/// hasValidLocation - Return true if debug location entry attached with +/// machine instruction encodes valid location info. +static bool hasValidLocation(LLVMContext &Ctx, + const MachineInstr *MInsn, + const MDNode *&Scope, const MDNode *&InlinedAt) { + DebugLoc DL = MInsn->getDebugLoc(); + if (DL.isUnknown()) return false; + + const MDNode *S = DL.getScope(Ctx); + + // There is no need to create another DIE for compile unit. For all + // other scopes, create one DbgScope now. This will be translated + // into a scope DIE at the end. + if (DIScope(S).isCompileUnit()) return false; + + Scope = S; + InlinedAt = DL.getInlinedAt(Ctx); + return true; +} + +/// calculateDominanceGraph - Calculate dominance graph for DbgScope +/// hierarchy. +static void calculateDominanceGraph(DbgScope *Scope) { + assert (Scope && "Unable to calculate scop edominance graph!"); + SmallVector<DbgScope *, 4> WorkStack; + WorkStack.push_back(Scope); + unsigned Counter = 0; + while (!WorkStack.empty()) { + DbgScope *WS = WorkStack.back(); + const SmallVector<DbgScope *, 4> &Children = WS->getScopes(); + bool visitedChildren = false; + for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(), + SE = Children.end(); SI != SE; ++SI) { + DbgScope *ChildScope = *SI; + if (!ChildScope->getDFSOut()) { + WorkStack.push_back(ChildScope); + visitedChildren = true; + ChildScope->setDFSIn(++Counter); + break; + } + } + if (!visitedChildren) { + WorkStack.pop_back(); + WS->setDFSOut(++Counter); + } + } +} + +/// printDbgScopeInfo - Print DbgScope info for each machine instruction. +static +void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, + DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap) +{ +#ifndef NDEBUG + unsigned PrevDFSIn = 0; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MInsn = II; + const MDNode *Scope = NULL; + const MDNode *InlinedAt = NULL; + + // Check if instruction has valid location information. + if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + dbgs() << " [ "; + if (InlinedAt) + dbgs() << "*"; + DenseMap<const MachineInstr *, DbgScope *>::iterator DI = + MI2ScopeMap.find(MInsn); + if (DI != MI2ScopeMap.end()) { + DbgScope *S = DI->second; + dbgs() << S->getDFSIn(); + PrevDFSIn = S->getDFSIn(); + } else + dbgs() << PrevDFSIn; + } else + dbgs() << " [ x" << PrevDFSIn; + dbgs() << " ]"; + MInsn->dump(); + } + dbgs() << "\n"; + } +#endif +} +/// extractScopeInformation - Scan machine instructions in this function +/// and collect DbgScopes. Return true, if at least one scope was found. +bool DwarfDebug::extractScopeInformation() { + // If scope information was extracted using .dbg intrinsics then there is not + // any need to extract these information by scanning each instruction. + if (!DbgScopeMap.empty()) + return false; + + // Scan each instruction and create scopes. First build working set of scopes. + LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); + SmallVector<DbgRange, 4> MIRanges; + DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap; + const MDNode *PrevScope = NULL; + const MDNode *PrevInlinedAt = NULL; + const MachineInstr *RangeBeginMI = NULL; + const MachineInstr *PrevMI = NULL; + for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MInsn = II; + const MDNode *Scope = NULL; + const MDNode *InlinedAt = NULL; + + // Check if instruction has valid location information. + if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + PrevMI = MInsn; + continue; + } + + // If scope has not changed then skip this instruction. + if (Scope == PrevScope && PrevInlinedAt == InlinedAt) { + PrevMI = MInsn; + continue; + } + + if (RangeBeginMI) { + // If we have alread seen a beginning of a instruction range and + // current instruction scope does not match scope of first instruction + // in this range then create a new instruction range. + DbgRange R(RangeBeginMI, PrevMI); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, + PrevInlinedAt); + MIRanges.push_back(R); + } + + // This is a beginning of a new instruction range. + RangeBeginMI = MInsn; + + // Reset previous markers. + PrevMI = MInsn; + PrevScope = Scope; + PrevInlinedAt = InlinedAt; + } + } + + // Create last instruction range. + if (RangeBeginMI && PrevMI && PrevScope) { + DbgRange R(RangeBeginMI, PrevMI); + MIRanges.push_back(R); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); + } + + if (!CurrentFnDbgScope) + return false; + + calculateDominanceGraph(CurrentFnDbgScope); + if (PrintDbgScope) + printDbgScopeInfo(Ctx, Asm->MF, MI2ScopeMap); + + // Find ranges of instructions covered by each DbgScope; + DbgScope *PrevDbgScope = NULL; + for (SmallVector<DbgRange, 4>::const_iterator RI = MIRanges.begin(), + RE = MIRanges.end(); RI != RE; ++RI) { + const DbgRange &R = *RI; + DbgScope *S = MI2ScopeMap.lookup(R.first); + assert (S && "Lost DbgScope for a machine instruction!"); + if (PrevDbgScope && !PrevDbgScope->dominates(S)) + PrevDbgScope->closeInsnRange(S); + S->openInsnRange(R.first); + S->extendInsnRange(R.second); + PrevDbgScope = S; + } + + if (PrevDbgScope) + PrevDbgScope->closeInsnRange(); + + identifyScopeMarkers(); + + return !DbgScopeMap.empty(); +} + +/// identifyScopeMarkers() - +/// Each DbgScope has first instruction and last instruction to mark beginning +/// and end of a scope respectively. Create an inverse map that list scopes +/// starts (and ends) with an instruction. One instruction may start (or end) +/// multiple scopes. Ignore scopes that are not reachable. +void DwarfDebug::identifyScopeMarkers() { + SmallVector<DbgScope *, 4> WorkList; + WorkList.push_back(CurrentFnDbgScope); + while (!WorkList.empty()) { + DbgScope *S = WorkList.pop_back_val(); + + const SmallVector<DbgScope *, 4> &Children = S->getScopes(); + if (!Children.empty()) + for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(), + SE = Children.end(); SI != SE; ++SI) + WorkList.push_back(*SI); + + if (S->isAbstractScope()) + continue; + + const SmallVector<DbgRange, 4> &Ranges = S->getRanges(); + if (Ranges.empty()) + continue; + for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), + RE = Ranges.end(); RI != RE; ++RI) { + assert(RI->first && "DbgRange does not have first instruction!"); + assert(RI->second && "DbgRange does not have second instruction!"); + InsnsEndScopeSet.insert(RI->second); + } + } +} + +/// FindFirstDebugLoc - Find the first debug location in the function. This +/// is intended to be an approximation for the source position of the +/// beginning of the function. +static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator MBBI = I->begin(), MBBE = I->end(); + MBBI != MBBE; ++MBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + if (!DL.isUnknown()) + return DL; + } + return DebugLoc(); +} + +/// beginFunction - Gather pre-function debug information. Assumes being +/// emitted immediately after the function entry point. +void DwarfDebug::beginFunction(const MachineFunction *MF) { + if (!MMI->hasDebugInfo()) return; + if (!extractScopeInformation()) return; + + FunctionBeginSym = Asm->GetTempSymbol("func_begin", + Asm->getFunctionNumber()); + // Assumes in correct section after the entry point. + Asm->OutStreamer.EmitLabel(FunctionBeginSym); + + // Emit label for the implicitly defined dbg.stoppoint at the start of the + // function. + DebugLoc FDL = FindFirstDebugLoc(MF); + if (FDL.isUnknown()) return; + + const MDNode *Scope = FDL.getScope(MF->getFunction()->getContext()); + const MDNode *TheScope = 0; + + DISubprogram SP = getDISubprogram(Scope); + unsigned Line, Col; + if (SP.Verify()) { + Line = SP.getLineNumber(); + Col = 0; + TheScope = SP; + } else { + Line = FDL.getLine(); + Col = FDL.getCol(); + TheScope = Scope; + } + + recordSourceLine(Line, Col, TheScope); + + /// ProcessedArgs - Collection of arguments already processed. + SmallPtrSet<const MDNode *, 8> ProcessedArgs; + + DebugLoc PrevLoc; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MI = II; + DebugLoc DL = MI->getDebugLoc(); + if (MI->isDebugValue()) { + assert (MI->getNumOperands() > 1 && "Invalid machine instruction!"); + DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata()); + if (!DV.Verify()) continue; + // If DBG_VALUE is for a local variable then it needs a label. + if (DV.getTag() != dwarf::DW_TAG_arg_variable + && isDbgValueInUndefinedReg(MI) == false) + InsnNeedsLabel.insert(MI); + // DBG_VALUE for inlined functions argument needs a label. + else if (!DISubprogram(getDISubprogram(DV.getContext())). + describes(MF->getFunction())) + InsnNeedsLabel.insert(MI); + // DBG_VALUE indicating argument location change needs a label. + else if (isDbgValueInUndefinedReg(MI) == false + && !ProcessedArgs.insert(DV)) + InsnNeedsLabel.insert(MI); + } else { + // If location is unknown then instruction needs a location only if + // UnknownLocations flag is set. + if (DL.isUnknown()) { + if (UnknownLocations && !PrevLoc.isUnknown()) + InsnNeedsLabel.insert(MI); + } else if (DL != PrevLoc) + // Otherwise, instruction needs a location only if it is new location. + InsnNeedsLabel.insert(MI); + } + + if (!DL.isUnknown() || UnknownLocations) + PrevLoc = DL; + } + + PrevLabel = FunctionBeginSym; +} + +/// endFunction - Gather and emit post-function debug information. +/// +void DwarfDebug::endFunction(const MachineFunction *MF) { + if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return; + + if (CurrentFnDbgScope) { + + // Define end label for subprogram. + FunctionEndSym = Asm->GetTempSymbol("func_end", + Asm->getFunctionNumber()); + // Assumes in correct section after the entry point. + Asm->OutStreamer.EmitLabel(FunctionEndSym); + + SmallPtrSet<const MDNode *, 16> ProcessedVars; + collectVariableInfo(MF, ProcessedVars); + + // Get function line info. + if (!Lines.empty()) { + // Get section line info. + unsigned ID = SectionMap.insert(Asm->getCurrentSection()); + if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID); + std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1]; + // Append the function info to section info. + SectionLineInfos.insert(SectionLineInfos.end(), + Lines.begin(), Lines.end()); + } + + // Construct abstract scopes. + for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(), + AE = AbstractScopesList.end(); AI != AE; ++AI) { + DISubprogram SP((*AI)->getScopeNode()); + if (SP.Verify()) { + // Collect info for variables that were optimized out. + StringRef FName = SP.getLinkageName(); + if (FName.empty()) + FName = SP.getName(); + const Module *M = MF->getFunction()->getParent(); + if (NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", + getRealLinkageName(FName)))) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIVariable DV(cast<MDNode>(NMD->getOperand(i))); + if (!DV || !ProcessedVars.insert(DV)) + continue; + DbgScope *Scope = AbstractScopes.lookup(DV.getContext()); + if (Scope) + Scope->addVariable(new DbgVariable(DV)); + } + } + } + if (ProcessedSPNodes.count((*AI)->getScopeNode()) == 0) + constructScopeDIE(*AI); + } + + DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); + + if (!DisableFramePointerElim(*MF)) + addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, + dwarf::DW_FORM_flag, 1); + + + DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), + MMI->getFrameMoves())); + } + + // Clear debug info + CurrentFnDbgScope = NULL; + InsnNeedsLabel.clear(); + DbgVariableToFrameIndexMap.clear(); + VarToAbstractVarMap.clear(); + DbgVariableToDbgInstMap.clear(); + DbgVariableLabelsMap.clear(); + DeleteContainerSeconds(DbgScopeMap); + InsnsEndScopeSet.clear(); + ConcreteScopes.clear(); + DeleteContainerSeconds(AbstractScopes); + AbstractScopesList.clear(); + AbstractVariables.clear(); + LabelsBeforeInsn.clear(); + LabelsAfterInsn.clear(); + Lines.clear(); + PrevLabel = NULL; +} + +/// recordVariableFrameIndex - Record a variable's index. +void DwarfDebug::recordVariableFrameIndex(const DbgVariable *V, int Index) { + assert (V && "Invalid DbgVariable!"); + DbgVariableToFrameIndexMap[V] = Index; +} + +/// findVariableFrameIndex - Return true if frame index for the variable +/// is found. Update FI to hold value of the index. +bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) { + assert (V && "Invalid DbgVariable!"); + DenseMap<const DbgVariable *, int>::iterator I = + DbgVariableToFrameIndexMap.find(V); + if (I == DbgVariableToFrameIndexMap.end()) + return false; + *FI = I->second; + return true; +} + +/// findVariableLabel - Find MCSymbol for the variable. +const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) { + DenseMap<const DbgVariable *, const MCSymbol *>::iterator I + = DbgVariableLabelsMap.find(V); + if (I == DbgVariableLabelsMap.end()) + return NULL; + else return I->second; +} + +/// findDbgScope - Find DbgScope for the debug loc attached with an +/// instruction. +DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { + DbgScope *Scope = NULL; + LLVMContext &Ctx = + MInsn->getParent()->getParent()->getFunction()->getContext(); + DebugLoc DL = MInsn->getDebugLoc(); + + if (DL.isUnknown()) + return Scope; + + if (const MDNode *IA = DL.getInlinedAt(Ctx)) + Scope = ConcreteScopes.lookup(IA); + if (Scope == 0) + Scope = DbgScopeMap.lookup(DL.getScope(Ctx)); + + return Scope; +} + + +/// recordSourceLine - Register a source line with debug info. Returns the +/// unique label that was emitted and which provides correspondence to +/// the source line list. +MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, + const MDNode *S) { + StringRef Dir; + StringRef Fn; + + unsigned Src = 1; + if (S) { + DIDescriptor Scope(S); + + if (Scope.isCompileUnit()) { + DICompileUnit CU(S); + Dir = CU.getDirectory(); + Fn = CU.getFilename(); + } else if (Scope.isSubprogram()) { + DISubprogram SP(S); + Dir = SP.getDirectory(); + Fn = SP.getFilename(); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(S); + Dir = DB.getDirectory(); + Fn = DB.getFilename(); + } else + assert(0 && "Unexpected scope info"); + + Src = GetOrCreateSourceID(Dir, Fn); + } + + MCSymbol *Label = MMI->getContext().CreateTempSymbol(); + Lines.push_back(SrcLineInfo(Line, Col, Src, Label)); + + Asm->OutStreamer.EmitLabel(Label); + return Label; +} + +//===----------------------------------------------------------------------===// +// Emit Methods +//===----------------------------------------------------------------------===// + +/// computeSizeAndOffset - Compute the size and offset of a DIE. +/// +unsigned +DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { + // Get the children. + const std::vector<DIE *> &Children = Die->getChildren(); + + // If not last sibling and has children then add sibling offset attribute. + if (!Last && !Children.empty()) + Die->addSiblingOffset(DIEValueAllocator); + + // Record the abbreviation. + assignAbbrevNumber(Die->getAbbrev()); + + // Get the abbreviation for this DIE. + unsigned AbbrevNumber = Die->getAbbrevNumber(); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + + // Set DIE offset + Die->setOffset(Offset); + + // Start the size with the size of abbreviation code. + Offset += MCAsmInfo::getULEB128Size(AbbrevNumber); + + const SmallVector<DIEValue*, 32> &Values = Die->getValues(); + const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData(); + + // Size the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) + // Size attribute value. + Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm()); + + // Size the DIE children if any. + if (!Children.empty()) { + assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes && + "Children flag not set"); + + for (unsigned j = 0, M = Children.size(); j < M; ++j) + Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M); + + // End of children marker. + Offset += sizeof(int8_t); + } + + Die->setSize(Offset - Die->getOffset()); + return Offset; +} + +/// computeSizeAndOffsets - Compute the size and offset of all the DIEs. +/// +void DwarfDebug::computeSizeAndOffsets() { + unsigned PrevOffset = 0; + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + // Compute size of compile unit header. + static unsigned Offset = PrevOffset + + sizeof(int32_t) + // Length of Compilation Unit Info + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + computeSizeAndOffset(I->second->getCUDie(), Offset, true); + PrevOffset = Offset; + } +} + +/// EmitSectionSym - Switch to the specified MCSection and emit an assembler +/// temporary label to it if SymbolStem is specified. +static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section, + const char *SymbolStem = 0) { + Asm->OutStreamer.SwitchSection(Section); + if (!SymbolStem) return 0; + + MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); + Asm->OutStreamer.EmitLabel(TmpSym); + return TmpSym; +} + +/// EmitSectionLabels - Emit initial Dwarf sections with a label at +/// the start of each one. +void DwarfDebug::EmitSectionLabels() { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + // Dwarf sections base addresses. + if (Asm->MAI->doesDwarfRequireFrameSection()) { + DwarfFrameSectionSym = + EmitSectionSym(Asm, TLOF.getDwarfFrameSection(), "section_debug_frame"); + } + + DwarfInfoSectionSym = + EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); + DwarfAbbrevSectionSym = + EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); + EmitSectionSym(Asm, TLOF.getDwarfARangesSection()); + + if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) + EmitSectionSym(Asm, MacroInfo); + + EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); + EmitSectionSym(Asm, TLOF.getDwarfLocSection()); + EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); + EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); + DwarfStrSectionSym = + EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str"); + DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(), + "debug_range"); + + DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(), + "section_debug_loc"); + + TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin"); + EmitSectionSym(Asm, TLOF.getDataSection()); +} + +/// emitDIE - Recusively Emits a debug information entry. +/// +void DwarfDebug::emitDIE(DIE *Die) { + // Get the abbreviation for this DIE. + unsigned AbbrevNumber = Die->getAbbrevNumber(); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + + // Emit the code (index) for the abbreviation. + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" + + Twine::utohexstr(Die->getOffset()) + ":0x" + + Twine::utohexstr(Die->getSize()) + " " + + dwarf::TagString(Abbrev->getTag())); + Asm->EmitULEB128(AbbrevNumber); + + const SmallVector<DIEValue*, 32> &Values = Die->getValues(); + const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData(); + + // Emit the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + unsigned Attr = AbbrevData[i].getAttribute(); + unsigned Form = AbbrevData[i].getForm(); + assert(Form && "Too many attributes for DIE (check abbreviation)"); + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); + + switch (Attr) { + case dwarf::DW_AT_sibling: + Asm->EmitInt32(Die->getSiblingOffset()); + break; + case dwarf::DW_AT_abstract_origin: { + DIEEntry *E = cast<DIEEntry>(Values[i]); + DIE *Origin = E->getEntry(); + unsigned Addr = Origin->getOffset(); + Asm->EmitInt32(Addr); + break; + } + case dwarf::DW_AT_ranges: { + // DW_AT_range Value encodes offset in debug_range section. + DIEInteger *V = cast<DIEInteger>(Values[i]); + + if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) { + Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, + V->getValue(), + 4); + } else { + Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym, + V->getValue(), + DwarfDebugRangeSectionSym, + 4); + } + break; + } + case dwarf::DW_AT_location: { + if (UseDotDebugLocEntry.count(Die) != 0) { + DIELabel *L = cast<DIELabel>(Values[i]); + Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); + } else + Values[i]->EmitValue(Asm, Form); + break; + } + default: + // Emit an attribute using the defined form. + Values[i]->EmitValue(Asm, Form); + break; + } + } + + // Emit the DIE children if any. + if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) { + const std::vector<DIE *> &Children = Die->getChildren(); + + for (unsigned j = 0, M = Children.size(); j < M; ++j) + emitDIE(Children[j]); + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("End Of Children Mark"); + Asm->EmitInt8(0); + } +} + +/// emitDebugInfo - Emit the debug info section. +/// +void DwarfDebug::emitDebugInfo() { + // Start debug info section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfInfoSection()); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + DIE *Die = TheCU->getCUDie(); + + // Emit the compile units header. + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin", + TheCU->getID())); + + // Emit size of content not including length itself + unsigned ContentSize = Die->getSize() + + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int32_t); // FIXME - extra pad for gdb bug. + + Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); + Asm->EmitInt32(ContentSize); + Asm->OutStreamer.AddComment("DWARF version number"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"), + DwarfAbbrevSectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getTargetData().getPointerSize()); + + emitDIE(Die); + // FIXME - extra padding for gdb bug. + Asm->OutStreamer.AddComment("4 extra padding bytes for GDB"); + Asm->EmitInt8(0); + Asm->EmitInt8(0); + Asm->EmitInt8(0); + Asm->EmitInt8(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); + } +} + +/// emitAbbreviations - Emit the abbreviation section. +/// +void DwarfDebug::emitAbbreviations() const { + // Check to see if it is worth the effort. + if (!Abbreviations.empty()) { + // Start the debug abbrev section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAbbrevSection()); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_begin")); + + // For each abbrevation. + for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) { + // Get abbreviation data + const DIEAbbrev *Abbrev = Abbreviations[i]; + + // Emit the abbrevations code (base 1 index.) + Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev->Emit(Asm); + } + + // Mark end of abbreviations. + Asm->EmitULEB128(0, "EOM(3)"); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_end")); + } +} + +/// emitEndOfLineMatrix - Emit the last address of the section and the end of +/// the line matrix. +/// +void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { + // Define last address of section. + Asm->OutStreamer.AddComment("Extended Op"); + Asm->EmitInt8(0); + + Asm->OutStreamer.AddComment("Op size"); + Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1); + Asm->OutStreamer.AddComment("DW_LNE_set_address"); + Asm->EmitInt8(dwarf::DW_LNE_set_address); + + Asm->OutStreamer.AddComment("Section end label"); + + Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), + Asm->getTargetData().getPointerSize(), + 0/*AddrSpace*/); + + // Mark end of matrix. + Asm->OutStreamer.AddComment("DW_LNE_end_sequence"); + Asm->EmitInt8(0); + Asm->EmitInt8(1); + Asm->EmitInt8(1); +} + +/// emitDebugLines - Emit source line information. +/// +void DwarfDebug::emitDebugLines() { + // If the target is using .loc/.file, the assembler will be emitting the + // .debug_line table automatically. + if (Asm->MAI->hasDotLocAndDotFile()) + return; + + // Minimum line delta, thus ranging from -10..(255-10). + const int MinLineDelta = -(dwarf::DW_LNS_fixed_advance_pc + 1); + // Maximum line delta, thus ranging from -10..(255-10). + const int MaxLineDelta = 255 + MinLineDelta; + + // Start the dwarf line section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLineSection()); + + // Construct the section header. + Asm->OutStreamer.AddComment("Length of Source Line Info"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"), + Asm->GetTempSymbol("line_begin"), 4); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_begin")); + + Asm->OutStreamer.AddComment("DWARF version number"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Prolog Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("line_prolog_end"), + Asm->GetTempSymbol("line_prolog_begin"), 4); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_begin")); + + Asm->OutStreamer.AddComment("Minimum Instruction Length"); + Asm->EmitInt8(1); + Asm->OutStreamer.AddComment("Default is_stmt_start flag"); + Asm->EmitInt8(1); + Asm->OutStreamer.AddComment("Line Base Value (Special Opcodes)"); + Asm->EmitInt8(MinLineDelta); + Asm->OutStreamer.AddComment("Line Range Value (Special Opcodes)"); + Asm->EmitInt8(MaxLineDelta); + Asm->OutStreamer.AddComment("Special Opcode Base"); + Asm->EmitInt8(-MinLineDelta); + + // Line number standard opcode encodings argument count + Asm->OutStreamer.AddComment("DW_LNS_copy arg count"); + Asm->EmitInt8(0); + Asm->OutStreamer.AddComment("DW_LNS_advance_pc arg count"); + Asm->EmitInt8(1); + Asm->OutStreamer.AddComment("DW_LNS_advance_line arg count"); + Asm->EmitInt8(1); + Asm->OutStreamer.AddComment("DW_LNS_set_file arg count"); + Asm->EmitInt8(1); + Asm->OutStreamer.AddComment("DW_LNS_set_column arg count"); + Asm->EmitInt8(1); + Asm->OutStreamer.AddComment("DW_LNS_negate_stmt arg count"); + Asm->EmitInt8(0); + Asm->OutStreamer.AddComment("DW_LNS_set_basic_block arg count"); + Asm->EmitInt8(0); + Asm->OutStreamer.AddComment("DW_LNS_const_add_pc arg count"); + Asm->EmitInt8(0); + Asm->OutStreamer.AddComment("DW_LNS_fixed_advance_pc arg count"); + Asm->EmitInt8(1); + + // Emit directories. + for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) { + const std::string &Dir = getSourceDirectoryName(DI); + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Directory"); + Asm->OutStreamer.EmitBytes(StringRef(Dir.c_str(), Dir.size()+1), 0); + } + + Asm->OutStreamer.AddComment("End of directories"); + Asm->EmitInt8(0); + + // Emit files. + for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) { + // Remember source id starts at 1. + std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(SI); + const std::string &FN = getSourceFileName(Id.second); + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Source"); + Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0); + + Asm->EmitULEB128(Id.first, "Directory #"); + Asm->EmitULEB128(0, "Mod date"); + Asm->EmitULEB128(0, "File size"); + } + + Asm->OutStreamer.AddComment("End of files"); + Asm->EmitInt8(0); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_end")); + + // A sequence for each text section. + unsigned SecSrcLinesSize = SectionSourceLines.size(); + + for (unsigned j = 0; j < SecSrcLinesSize; ++j) { + // Isolate current sections line info. + const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j]; + + // Dwarf assumes we start with first line of first source file. + unsigned Source = 1; + unsigned Line = 1; + + // Construct rows of the address, source, line, column matrix. + for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) { + const SrcLineInfo &LineInfo = LineInfos[i]; + MCSymbol *Label = LineInfo.getLabel(); + if (!Label->isDefined()) continue; // Not emitted, in dead code. + + if (Asm->isVerbose()) { + std::pair<unsigned, unsigned> SrcID = + getSourceDirectoryAndFileIds(LineInfo.getSourceID()); + Asm->OutStreamer.AddComment(Twine(getSourceDirectoryName(SrcID.first)) + + "/" + + Twine(getSourceFileName(SrcID.second)) + + ":" + Twine(LineInfo.getLine())); + } + + // Define the line address. + Asm->OutStreamer.AddComment("Extended Op"); + Asm->EmitInt8(0); + Asm->OutStreamer.AddComment("Op size"); + Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1); + + Asm->OutStreamer.AddComment("DW_LNE_set_address"); + Asm->EmitInt8(dwarf::DW_LNE_set_address); + + Asm->OutStreamer.AddComment("Location label"); + Asm->OutStreamer.EmitSymbolValue(Label, + Asm->getTargetData().getPointerSize(), + 0/*AddrSpace*/); + + // If change of source, then switch to the new source. + if (Source != LineInfo.getSourceID()) { + Source = LineInfo.getSourceID(); + Asm->OutStreamer.AddComment("DW_LNS_set_file"); + Asm->EmitInt8(dwarf::DW_LNS_set_file); + Asm->EmitULEB128(Source, "New Source"); + } + + // If change of line. + if (Line != LineInfo.getLine()) { + // Determine offset. + int Offset = LineInfo.getLine() - Line; + int Delta = Offset - MinLineDelta; + + // Update line. + Line = LineInfo.getLine(); + + // If delta is small enough and in range... + if (Delta >= 0 && Delta < (MaxLineDelta - 1)) { + // ... then use fast opcode. + Asm->OutStreamer.AddComment("Line Delta"); + Asm->EmitInt8(Delta - MinLineDelta); + } else { + // ... otherwise use long hand. + Asm->OutStreamer.AddComment("DW_LNS_advance_line"); + Asm->EmitInt8(dwarf::DW_LNS_advance_line); + Asm->EmitSLEB128(Offset, "Line Offset"); + Asm->OutStreamer.AddComment("DW_LNS_copy"); + Asm->EmitInt8(dwarf::DW_LNS_copy); + } + } else { + // Copy the previous row (different address or source) + Asm->OutStreamer.AddComment("DW_LNS_copy"); + Asm->EmitInt8(dwarf::DW_LNS_copy); + } + } + + emitEndOfLineMatrix(j + 1); + } + + if (SecSrcLinesSize == 0) + // Because we're emitting a debug_line section, we still need a line + // table. The linker and friends expect it to exist. If there's nothing to + // put into it, emit an empty table. + emitEndOfLineMatrix(1); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_end")); +} + +/// emitCommonDebugFrame - Emit common frame info into a debug frame section. +/// +void DwarfDebug::emitCommonDebugFrame() { + if (!Asm->MAI->doesDwarfRequireFrameSection()) + return; + + int stackGrowth = Asm->getTargetData().getPointerSize(); + if (Asm->TM.getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsDown) + stackGrowth *= -1; + + // Start the dwarf frame section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfFrameSection()); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common")); + Asm->OutStreamer.AddComment("Length of Common Information Entry"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_frame_common_end"), + Asm->GetTempSymbol("debug_frame_common_begin"), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_begin")); + Asm->OutStreamer.AddComment("CIE Identifier Tag"); + Asm->EmitInt32((int)dwarf::DW_CIE_ID); + Asm->OutStreamer.AddComment("CIE Version"); + Asm->EmitInt8(dwarf::DW_CIE_VERSION); + Asm->OutStreamer.AddComment("CIE Augmentation"); + Asm->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0); // nul terminator. + Asm->EmitULEB128(1, "CIE Code Alignment Factor"); + Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor"); + Asm->OutStreamer.AddComment("CIE RA Column"); + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false)); + + std::vector<MachineMove> Moves; + RI->getInitialFrameState(Moves); + + Asm->EmitFrameMoves(Moves, 0, false); + + Asm->EmitAlignment(2); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_end")); +} + +/// emitFunctionDebugFrame - Emit per function frame info into a debug frame +/// section. +void DwarfDebug:: +emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) { + if (!Asm->MAI->doesDwarfRequireFrameSection()) + return; + + // Start the dwarf frame section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfFrameSection()); + + Asm->OutStreamer.AddComment("Length of Frame Information Entry"); + MCSymbol *DebugFrameBegin = + Asm->GetTempSymbol("debug_frame_begin", DebugFrameInfo.Number); + MCSymbol *DebugFrameEnd = + Asm->GetTempSymbol("debug_frame_end", DebugFrameInfo.Number); + Asm->EmitLabelDifference(DebugFrameEnd, DebugFrameBegin, 4); + + Asm->OutStreamer.EmitLabel(DebugFrameBegin); + + Asm->OutStreamer.AddComment("FDE CIE offset"); + Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"), + DwarfFrameSectionSym); + + Asm->OutStreamer.AddComment("FDE initial location"); + MCSymbol *FuncBeginSym = + Asm->GetTempSymbol("func_begin", DebugFrameInfo.Number); + Asm->OutStreamer.EmitSymbolValue(FuncBeginSym, + Asm->getTargetData().getPointerSize(), + 0/*AddrSpace*/); + + + Asm->OutStreamer.AddComment("FDE address range"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("func_end",DebugFrameInfo.Number), + FuncBeginSym, Asm->getTargetData().getPointerSize()); + + Asm->EmitFrameMoves(DebugFrameInfo.Moves, FuncBeginSym, false); + + Asm->EmitAlignment(2); + Asm->OutStreamer.EmitLabel(DebugFrameEnd); +} + +/// emitDebugPubNames - Emit visible names into a debug pubnames section. +/// +void DwarfDebug::emitDebugPubNames() { + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubNamesSection()); + + Asm->OutStreamer.AddComment("Length of Public Names Info"); + Asm->EmitLabelDifference( + Asm->GetTempSymbol("pubnames_end", TheCU->getID()), + Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", + TheCU->getID())); + + Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); + Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), + DwarfInfoSectionSym); + + Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), + Asm->GetTempSymbol("info_begin", TheCU->getID()), + 4); + + const StringMap<DIE*> &Globals = TheCU->getGlobals(); + for (StringMap<DIE*>::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE *Entity = GI->second; + + Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + } + + Asm->OutStreamer.AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", + TheCU->getID())); + } +} + +void DwarfDebug::emitDebugPubTypes() { + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubTypesSection()); + Asm->OutStreamer.AddComment("Length of Public Types Info"); + Asm->EmitLabelDifference( + Asm->GetTempSymbol("pubtypes_end", TheCU->getID()), + Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", + TheCU->getID())); + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); + Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), + DwarfInfoSectionSym); + + Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), + Asm->GetTempSymbol("info_begin", TheCU->getID()), + 4); + + const StringMap<DIE*> &Globals = TheCU->getGlobalTypes(); + for (StringMap<DIE*>::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE * Entity = GI->second; + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); + } + + Asm->OutStreamer.AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", + TheCU->getID())); + } +} + +/// emitDebugStr - Emit visible names into a debug str section. +/// +void DwarfDebug::emitDebugStr() { + // Check to see if it is worth the effort. + if (StringPool.empty()) return; + + // Start the dwarf str section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfStrSection()); + + // Get all of the string pool entries and put them in an array by their ID so + // we can sort them. + SmallVector<std::pair<unsigned, + StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries; + + for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator + I = StringPool.begin(), E = StringPool.end(); I != E; ++I) + Entries.push_back(std::make_pair(I->second.second, &*I)); + + array_pod_sort(Entries.begin(), Entries.end()); + + for (unsigned i = 0, e = Entries.size(); i != e; ++i) { + // Emit a label for reference from debug information entries. + Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first); + + // Emit the string itself. + Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/); + } +} + +/// emitDebugLoc - Emit visible names into a debug loc section. +/// +void DwarfDebug::emitDebugLoc() { + if (DotDebugLocEntries.empty()) + return; + + // Start the dwarf loc section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLocSection()); + unsigned char Size = Asm->getTargetData().getPointerSize(); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); + unsigned index = 1; + for (SmallVector<DotDebugLocEntry, 4>::iterator + I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); + I != E; ++I, ++index) { + DotDebugLocEntry Entry = *I; + if (Entry.isEmpty()) { + Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); + } else { + Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0); + Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0); + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false); + if (int Offset = Entry.Loc.getOffset()) { + // If the value is at a certain offset from frame register then + // use DW_OP_fbreg. + unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1; + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(1 + OffsetSize); + Asm->OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_fbreg)); + Asm->EmitInt8(dwarf::DW_OP_fbreg); + Asm->OutStreamer.AddComment("Offset"); + Asm->EmitSLEB128(Offset); + } else { + if (Reg < 32) { + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(1); + Asm->OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); + Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg); + } else { + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg)); + Asm->EmitInt8(dwarf::DW_OP_regx); + Asm->EmitULEB128(Reg); + } + } + } + } +} + +/// EmitDebugARanges - Emit visible names into a debug aranges section. +/// +void DwarfDebug::EmitDebugARanges() { + // Start the dwarf aranges section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfARangesSection()); +} + +/// emitDebugRanges - Emit visible names into a debug ranges section. +/// +void DwarfDebug::emitDebugRanges() { + // Start the dwarf ranges section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfRangesSection()); + unsigned char Size = Asm->getTargetData().getPointerSize(); + for (SmallVector<const MCSymbol *, 8>::iterator + I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); + I != E; ++I) { + if (*I) + Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0); + else + Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + } +} + +/// emitDebugMacInfo - Emit visible names into a debug macinfo section. +/// +void DwarfDebug::emitDebugMacInfo() { + if (const MCSection *LineInfo = + Asm->getObjFileLowering().getDwarfMacroInfoSection()) { + // Start the dwarf macinfo section. + Asm->OutStreamer.SwitchSection(LineInfo); + } +} + +/// emitDebugInlineInfo - Emit inline info using following format. +/// Section Header: +/// 1. length of section +/// 2. Dwarf version number +/// 3. address size. +/// +/// Entries (one "entry" for each function that was inlined): +/// +/// 1. offset into __debug_str section for MIPS linkage name, if exists; +/// otherwise offset into __debug_str for regular function name. +/// 2. offset into __debug_str section for regular function name. +/// 3. an unsigned LEB128 number indicating the number of distinct inlining +/// instances for the function. +/// +/// The rest of the entry consists of a {die_offset, low_pc} pair for each +/// inlined instance; the die_offset points to the inlined_subroutine die in the +/// __debug_info section, and the low_pc is the starting address for the +/// inlining instance. +void DwarfDebug::emitDebugInlineInfo() { + if (!Asm->MAI->doesDwarfUsesInlineInfoSection()) + return; + + if (!FirstCU) + return; + + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfDebugInlineSection()); + + Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1), + Asm->GetTempSymbol("debug_inlined_begin", 1), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1)); + + Asm->OutStreamer.AddComment("Dwarf Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getTargetData().getPointerSize()); + + for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(), + E = InlinedSPNodes.end(); I != E; ++I) { + + const MDNode *Node = *I; + DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II + = InlineInfo.find(Node); + SmallVector<InlineInfoLabels, 4> &Labels = II->second; + DISubprogram SP(Node); + StringRef LName = SP.getLinkageName(); + StringRef Name = SP.getName(); + + Asm->OutStreamer.AddComment("MIPS linkage name"); + if (LName.empty()) { + Asm->OutStreamer.EmitBytes(Name, 0); + Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator. + } else + Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)), + DwarfStrSectionSym); + + Asm->OutStreamer.AddComment("Function name"); + Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym); + Asm->EmitULEB128(Labels.size(), "Inline count"); + + for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(), + LE = Labels.end(); LI != LE; ++LI) { + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(LI->second->getOffset()); + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); + Asm->OutStreamer.EmitSymbolValue(LI->first, + Asm->getTargetData().getPointerSize(),0); + } + } + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1)); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h new file mode 100644 index 0000000..f0ff3bc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -0,0 +1,641 @@ +//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "DIE.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/Support/Allocator.h" + +namespace llvm { + +class CompileUnit; +class DbgConcreteScope; +class DbgScope; +class DbgVariable; +class MachineFrameInfo; +class MachineModuleInfo; +class MachineOperand; +class MCAsmInfo; +class DIEAbbrev; +class DIE; +class DIEBlock; +class DIEEntry; + +class DIEnumerator; +class DIDescriptor; +class DIVariable; +class DIGlobal; +class DIGlobalVariable; +class DISubprogram; +class DIBasicType; +class DIDerivedType; +class DIType; +class DINameSpace; +class DISubrange; +class DICompositeType; + +//===----------------------------------------------------------------------===// +/// SrcLineInfo - This class is used to record source line correspondence. +/// +class SrcLineInfo { + unsigned Line; // Source line number. + unsigned Column; // Source column. + unsigned SourceID; // Source ID number. + MCSymbol *Label; // Label in code ID number. +public: + SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label) + : Line(L), Column(C), SourceID(S), Label(label) {} + + // Accessors + unsigned getLine() const { return Line; } + unsigned getColumn() const { return Column; } + unsigned getSourceID() const { return SourceID; } + MCSymbol *getLabel() const { return Label; } +}; + +class DwarfDebug { + /// Asm - Target of Dwarf emission. + AsmPrinter *Asm; + + /// MMI - Collected machine module information. + MachineModuleInfo *MMI; + + //===--------------------------------------------------------------------===// + // Attributes used to construct specific Dwarf sections. + // + + CompileUnit *FirstCU; + DenseMap <const MDNode *, CompileUnit *> CUMap; + + /// AbbreviationsSet - Used to uniquely define abbreviations. + /// + FoldingSet<DIEAbbrev> AbbreviationsSet; + + /// Abbreviations - A list of all the unique abbreviations in use. + /// + std::vector<DIEAbbrev *> Abbreviations; + + /// DirectoryIdMap - Directory name to directory id map. + /// + StringMap<unsigned> DirectoryIdMap; + + /// DirectoryNames - A list of directory names. + SmallVector<std::string, 8> DirectoryNames; + + /// SourceFileIdMap - Source file name to source file id map. + /// + StringMap<unsigned> SourceFileIdMap; + + /// SourceFileNames - A list of source file names. + SmallVector<std::string, 8> SourceFileNames; + + /// SourceIdMap - Source id map, i.e. pair of directory id and source file + /// id mapped to a unique id. + DenseMap<std::pair<unsigned, unsigned>, unsigned> SourceIdMap; + + /// SourceIds - Reverse map from source id to directory id + file id pair. + /// + SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds; + + /// Lines - List of source line correspondence. + std::vector<SrcLineInfo> Lines; + + /// DIEBlocks - A list of all the DIEBlocks in use. + std::vector<DIEBlock *> DIEBlocks; + + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + + /// StringPool - A String->Symbol mapping of strings used by indirect + /// references. + StringMap<std::pair<MCSymbol*, unsigned> > StringPool; + unsigned NextStringPoolNumber; + + MCSymbol *getStringPoolEntry(StringRef Str); + + /// SectionMap - Provides a unique id per text section. + /// + UniqueVector<const MCSection*> SectionMap; + + /// SectionSourceLines - Tracks line numbers per text section. + /// + std::vector<std::vector<SrcLineInfo> > SectionSourceLines; + + // CurrentFnDbgScope - Top level scope for the current function. + // + DbgScope *CurrentFnDbgScope; + + /// DbgScopeMap - Tracks the scopes in the current function. Owns the + /// contained DbgScope*s. + /// + DenseMap<const MDNode *, DbgScope *> DbgScopeMap; + + /// ConcreteScopes - Tracks the concrete scopees in the current function. + /// These scopes are also included in DbgScopeMap. + DenseMap<const MDNode *, DbgScope *> ConcreteScopes; + + /// AbstractScopes - Tracks the abstract scopes a module. These scopes are + /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s. + DenseMap<const MDNode *, DbgScope *> AbstractScopes; + + /// AbstractSPDies - Collection of abstract subprogram DIEs. + DenseMap<const MDNode *, DIE *> AbstractSPDies; + + /// AbstractScopesList - Tracks abstract scopes constructed while processing + /// a function. This list is cleared during endFunction(). + SmallVector<DbgScope *, 4>AbstractScopesList; + + /// AbstractVariables - Collection on abstract variables. Owned by the + /// DbgScopes in AbstractScopes. + DenseMap<const MDNode *, DbgVariable *> AbstractVariables; + + /// DbgVariableToFrameIndexMap - Tracks frame index used to find + /// variable's value. + DenseMap<const DbgVariable *, int> DbgVariableToFrameIndexMap; + + /// DbgVariableToDbgInstMap - Maps DbgVariable to corresponding DBG_VALUE + /// machine instruction. + DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap; + + /// DbgVariableLabelsMap - Maps DbgVariable to corresponding MCSymbol. + DenseMap<const DbgVariable *, const MCSymbol *> DbgVariableLabelsMap; + + /// DotDebugLocEntry - This struct describes location entries emitted in + /// .debug_loc section. + typedef struct DotDebugLocEntry { + const MCSymbol *Begin; + const MCSymbol *End; + MachineLocation Loc; + DotDebugLocEntry() : Begin(0), End(0) {} + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, + MachineLocation &L) : Begin(B), End(E), Loc(L) {} + /// Empty entries are also used as a trigger to emit temp label. Such + /// labels are referenced is used to find debug_loc offset for a given DIE. + bool isEmpty() { return Begin == 0 && End == 0; } + } DotDebugLocEntry; + + /// DotDebugLocEntries - Collection of DotDebugLocEntry. + SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries; + + /// UseDotDebugLocEntry - DW_AT_location attributes for the DIEs in this set + /// idetifies corresponding .debug_loc entry offset. + SmallPtrSet<const DIE *, 4> UseDotDebugLocEntry; + + /// VarToAbstractVarMap - Maps DbgVariable with corresponding Abstract + /// DbgVariable, if any. + DenseMap<const DbgVariable *, const DbgVariable *> VarToAbstractVarMap; + + /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked + /// (at the end of the module) as DW_AT_inline. + SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; + + /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that + /// need DW_AT_containing_type attribute. This attribute points to a DIE that + /// corresponds to the MDNode mapped with the subprogram DIE. + DenseMap<DIE *, const MDNode *> ContainingTypeMap; + + typedef SmallVector<DbgScope *, 2> ScopeVector; + + SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet; + + /// InlineInfo - Keep track of inlined functions and their location. This + /// information is used to populate debug_inlined section. + typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; + DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo; + SmallVector<const MDNode *, 4> InlinedSPNodes; + + // ProcessedSPNodes - This is a collection of subprogram MDNodes that + // are processed to create DIEs. + SmallPtrSet<const MDNode *, 16> ProcessedSPNodes; + + /// LabelsBeforeInsn - Maps instruction with label emitted before + /// instruction. + DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn; + + /// LabelsAfterInsn - Maps instruction with label emitted after + /// instruction. + DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; + + /// insnNeedsLabel - Collection of instructions that need a label to mark + /// a debuggging information entity. + SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel; + + SmallVector<const MCSymbol *, 8> DebugRangeSymbols; + + /// Previous instruction's location information. This is used to determine + /// label location to indicate scope boundries in dwarf debug info. + DebugLoc PrevInstLoc; + MCSymbol *PrevLabel; + + struct FunctionDebugFrameInfo { + unsigned Number; + std::vector<MachineMove> Moves; + + FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M) + : Number(Num), Moves(M) {} + }; + + std::vector<FunctionDebugFrameInfo> DebugFrames; + + // Section Symbols: these are assembler temporary labels that are emitted at + // the beginning of each supported dwarf section. These are used to form + // section offsets and are created by EmitSectionLabels. + MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; + MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; + MCSymbol *DwarfDebugLocSectionSym; + MCSymbol *FunctionBeginSym, *FunctionEndSym; + + DIEInteger *DIEIntegerOne; +private: + + /// getSourceDirectoryAndFileIds - Return the directory and file ids that + /// maps to the source id. Source id starts at 1. + std::pair<unsigned, unsigned> + getSourceDirectoryAndFileIds(unsigned SId) const { + return SourceIds[SId-1]; + } + + /// getNumSourceDirectories - Return the number of source directories in the + /// debug info. + unsigned getNumSourceDirectories() const { + return DirectoryNames.size(); + } + + /// getSourceDirectoryName - Return the name of the directory corresponding + /// to the id. + const std::string &getSourceDirectoryName(unsigned Id) const { + return DirectoryNames[Id - 1]; + } + + /// getSourceFileName - Return the name of the source file corresponding + /// to the id. + const std::string &getSourceFileName(unsigned Id) const { + return SourceFileNames[Id - 1]; + } + + /// getNumSourceIds - Return the number of unique source ids. + unsigned getNumSourceIds() const { + return SourceIds.size(); + } + + /// assignAbbrevNumber - Define a unique number for the abbreviation. + /// + void assignAbbrevNumber(DIEAbbrev &Abbrev); + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE *Entry); + + /// addUInt - Add an unsigned integer attribute data and value. + /// + void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + + /// addSInt - Add an signed integer attribute data and value. + /// + void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + + /// addString - Add a string attribute data and value. + /// + void addString(DIE *Die, unsigned Attribute, unsigned Form, + const StringRef Str); + + /// addLabel - Add a Dwarf label attribute data and value. + /// + void addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label); + + /// addDelta - Add a label delta attribute data and value. + /// + void addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo); + + /// addDIEEntry - Add a DIE attribute data and value. + /// + void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); + + /// addBlock - Add block data. + /// + void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + + /// addSourceLine - Add location information to specified debug information + /// entry. + void addSourceLine(DIE *Die, DIVariable V); + void addSourceLine(DIE *Die, DIGlobalVariable G); + void addSourceLine(DIE *Die, DISubprogram SP); + void addSourceLine(DIE *Die, DIType Ty); + void addSourceLine(DIE *Die, DINameSpace NS); + + /// addAddress - Add an address attribute to a die based on the location + /// provided. + void addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addRegisterAddress - Add register location entry in variable DIE. + bool addRegisterAddress(DIE *Die, const MCSymbol *VS, const MachineOperand &MO); + + /// addConstantValue - Add constant value entry in variable DIE. + bool addConstantValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO); + + /// addConstantFPValue - Add constant value entry in variable DIE. + bool addConstantFPValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO); + + /// addComplexAddress - Start with the address based on the location provided, + /// and generate the DWARF information necessary to find the actual variable + /// (navigating the extra location information encoded in the type) based on + /// the starting location. Add the DWARF information to the die. + /// + void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + // FIXME: Should be reformulated in terms of addComplexAddress. + /// addBlockByrefAddress - Start with the address based on the location + /// provided, and generate the DWARF information necessary to find the + /// actual Block variable (navigating the Block struct) based on the + /// starting location. Add the DWARF information to the die. Obsolete, + /// please use addComplexAddress instead. + /// + void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based + /// on provided frame index. + void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI); + + /// addToContextOwner - Add Die into the list of its context owner's children. + void addToContextOwner(DIE *Die, DIDescriptor Context); + + /// addType - Add a new type attribute to the specified entity. + void addType(DIE *Entity, DIType Ty); + + + /// getOrCreateNameSpace - Create a DIE for DINameSpace. + DIE *getOrCreateNameSpace(DINameSpace NS); + + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the + /// given DIType. + DIE *getOrCreateTypeDIE(DIType Ty); + + void addPubTypes(DISubprogram SP); + + /// constructTypeDIE - Construct basic type die from DIBasicType. + void constructTypeDIE(DIE &Buffer, + DIBasicType BTy); + + /// constructTypeDIE - Construct derived type die from DIDerivedType. + void constructTypeDIE(DIE &Buffer, + DIDerivedType DTy); + + /// constructTypeDIE - Construct type DIE from DICompositeType. + void constructTypeDIE(DIE &Buffer, + DICompositeType CTy); + + /// constructSubrangeDIE - Construct subrange DIE from DISubrange. + void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); + + /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. + void constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy); + + /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. + DIE *constructEnumTypeDIE(DIEnumerator ETy); + + /// createMemberDIE - Create new member DIE. + DIE *createMemberDIE(DIDerivedType DT); + + /// createSubprogramDIE - Create new DIE using SP. + DIE *createSubprogramDIE(DISubprogram SP, bool MakeDecl = false); + + /// getOrCreateDbgScope - Create DbgScope for the scope. + DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); + + DbgScope *getOrCreateAbstractScope(const MDNode *N); + + /// findAbstractVariable - Find abstract variable associated with Var. + DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); + + /// updateSubprogramScopeDIE - Find DIE for the given subprogram and + /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. + /// If there are global variables in this scope then create and insert + /// DIEs for these variables. + DIE *updateSubprogramScopeDIE(const MDNode *SPNode); + + /// constructLexicalScope - Construct new DW_TAG_lexical_block + /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. + DIE *constructLexicalScopeDIE(DbgScope *Scope); + + /// constructInlinedScopeDIE - This scope represents inlined body of + /// a function. Construct DIE to represent this concrete inlined copy + /// of the function. + DIE *constructInlinedScopeDIE(DbgScope *Scope); + + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S); + + /// constructScopeDIE - Construct a DIE for this scope. + DIE *constructScopeDIE(DbgScope *Scope); + + /// EmitSectionLabels - Emit initial Dwarf sections with a label at + /// the start of each one. + void EmitSectionLabels(); + + /// emitDIE - Recusively Emits a debug information entry. + /// + void emitDIE(DIE *Die); + + /// computeSizeAndOffset - Compute the size and offset of a DIE. + /// + unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last); + + /// computeSizeAndOffsets - Compute the size and offset of all the DIEs. + /// + void computeSizeAndOffsets(); + + /// EmitDebugInfo - Emit the debug info section. + /// + void emitDebugInfo(); + + /// emitAbbreviations - Emit the abbreviation section. + /// + void emitAbbreviations() const; + + /// emitEndOfLineMatrix - Emit the last address of the section and the end of + /// the line matrix. + /// + void emitEndOfLineMatrix(unsigned SectionEnd); + + /// emitDebugLines - Emit source line information. + /// + void emitDebugLines(); + + /// emitCommonDebugFrame - Emit common frame info into a debug frame section. + /// + void emitCommonDebugFrame(); + + /// emitFunctionDebugFrame - Emit per function frame info into a debug frame + /// section. + void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo); + + /// emitDebugPubNames - Emit visible names into a debug pubnames section. + /// + void emitDebugPubNames(); + + /// emitDebugPubTypes - Emit visible types into a debug pubtypes section. + /// + void emitDebugPubTypes(); + + /// emitDebugStr - Emit visible names into a debug str section. + /// + void emitDebugStr(); + + /// emitDebugLoc - Emit visible names into a debug loc section. + /// + void emitDebugLoc(); + + /// EmitDebugARanges - Emit visible names into a debug aranges section. + /// + void EmitDebugARanges(); + + /// emitDebugRanges - Emit visible names into a debug ranges section. + /// + void emitDebugRanges(); + + /// emitDebugMacInfo - Emit visible names into a debug macinfo section. + /// + void emitDebugMacInfo(); + + /// emitDebugInlineInfo - Emit inline info using following format. + /// Section Header: + /// 1. length of section + /// 2. Dwarf version number + /// 3. address size. + /// + /// Entries (one "entry" for each function that was inlined): + /// + /// 1. offset into __debug_str section for MIPS linkage name, if exists; + /// otherwise offset into __debug_str for regular function name. + /// 2. offset into __debug_str section for regular function name. + /// 3. an unsigned LEB128 number indicating the number of distinct inlining + /// instances for the function. + /// + /// The rest of the entry consists of a {die_offset, low_pc} pair for each + /// inlined instance; the die_offset points to the inlined_subroutine die in + /// the __debug_info section, and the low_pc is the starting address for the + /// inlining instance. + void emitDebugInlineInfo(); + + /// GetOrCreateSourceID - Look up the source id with the given directory and + /// source file names. If none currently exists, create a new id and insert it + /// in the SourceIds map. This can update DirectoryNames and SourceFileNames + /// maps as well. + unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName); + + /// constructCompileUnit - Create new CompileUnit for the given + /// metadata node with tag DW_TAG_compile_unit. + void constructCompileUnit(const MDNode *N); + + /// getCompielUnit - Get CompileUnit DIE. + CompileUnit *getCompileUnit(const MDNode *N) const; + + /// constructGlobalVariableDIE - Construct global variable DIE. + void constructGlobalVariableDIE(const MDNode *N); + + /// construct SubprogramDIE - Construct subprogram DIE. + void constructSubprogramDIE(const MDNode *N); + + /// recordSourceLine - Register a source line with debug info. Returns the + /// unique label that was emitted and which provides correspondence to + /// the source line list. + MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope); + + /// getSourceLineCount - Return the number of source lines in the debug + /// info. + unsigned getSourceLineCount() const { + return Lines.size(); + } + + /// recordVariableFrameIndex - Record a variable's index. + void recordVariableFrameIndex(const DbgVariable *V, int Index); + + /// findVariableFrameIndex - Return true if frame index for the variable + /// is found. Update FI to hold value of the index. + bool findVariableFrameIndex(const DbgVariable *V, int *FI); + + /// findVariableLabel - Find MCSymbol for the variable. + const MCSymbol *findVariableLabel(const DbgVariable *V); + + /// findDbgScope - Find DbgScope for the debug loc attached with an + /// instruction. + DbgScope *findDbgScope(const MachineInstr *MI); + + /// identifyScopeMarkers() - Indentify instructions that are marking + /// beginning of or end of a scope. + void identifyScopeMarkers(); + + /// extractScopeInformation - Scan machine instructions in this function + /// and collect DbgScopes. Return true, if atleast one scope was found. + bool extractScopeInformation(); + + /// collectVariableInfo - Populate DbgScope entries with variables' info. + void collectVariableInfo(const MachineFunction *, + SmallPtrSet<const MDNode *, 16> &ProcessedVars); + + /// collectVariableInfoFromMMITable - Collect variable information from + /// side table maintained by MMI. + void collectVariableInfoFromMMITable(const MachineFunction * MF, + SmallPtrSet<const MDNode *, 16> &P); +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfDebug(AsmPrinter *A, Module *M); + ~DwarfDebug(); + + /// beginModule - Emit all Dwarf sections that should come prior to the + /// content. + void beginModule(Module *M); + + /// endModule - Emit all Dwarf sections that should come after the content. + /// + void endModule(); + + /// beginFunction - Gather pre-function debug information. Assumes being + /// emitted immediately after the function entry point. + void beginFunction(const MachineFunction *MF); + + /// endFunction - Gather and emit post-function debug information. + /// + void endFunction(const MachineFunction *MF); + + /// getLabelBeforeInsn - Return Label preceding the instruction. + const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + + /// getLabelAfterInsn - Return Label immediately following the instruction. + const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + + /// beginScope - Process beginning of a scope. + void beginScope(const MachineInstr *MI); + + /// endScope - Prcess end of a scope. + void endScope(const MachineInstr *MI); +}; +} // End of namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp new file mode 100644 index 0000000..86a3688 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -0,0 +1,960 @@ +//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +using namespace llvm; + +DwarfException::DwarfException(AsmPrinter *A) + : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false), + shouldEmitTableModule(false), shouldEmitMovesModule(false) {} + +DwarfException::~DwarfException() {} + +/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that +/// is shared among many Frame Description Entries. There is at least one CIE +/// in every non-empty .debug_frame section. +void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { + // Size and sign of stack growth. + int stackGrowth = Asm->getTargetData().getPointerSize(); + if (Asm->TM.getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsDown) + stackGrowth *= -1; + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + // Begin eh frame section. + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + + MCSymbol *EHFrameSym; + if (TLOF.isFunctionEHFrameSymbolPrivate()) + EHFrameSym = Asm->GetTempSymbol("EH_frame", Index); + else + EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") + + Twine(Index)); + Asm->OutStreamer.EmitLabel(EHFrameSym); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index)); + + // Define base labels. + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index)); + + // Define the eh frame length. + Asm->OutStreamer.AddComment("Length of Common Information Entry"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index), + Asm->GetTempSymbol("eh_frame_common_begin", Index), + 4); + + // EH frame header. + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index)); + Asm->OutStreamer.AddComment("CIE Identifier Tag"); + Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + Asm->OutStreamer.AddComment("DW_CIE_VERSION"); + Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/); + + // The personality presence indicates that language specific information will + // show up in the eh frame. Find out how we are supposed to lower the + // personality function reference: + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + + char Augmentation[6] = { 0 }; + unsigned AugmentationSize = 0; + char *APtr = Augmentation + 1; + + if (PersonalityFn) { + // There is a personality function. + *APtr++ = 'P'; + AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding); + } + + if (UsesLSDA[Index]) { + // An LSDA pointer is in the FDE augmentation. + *APtr++ = 'L'; + ++AugmentationSize; + } + + if (FDEEncoding != dwarf::DW_EH_PE_absptr) { + // A non-default pointer encoding for the FDE. + *APtr++ = 'R'; + ++AugmentationSize; + } + + if (APtr != Augmentation + 1) + Augmentation[0] = 'z'; + + Asm->OutStreamer.AddComment("CIE Augmentation"); + Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0); + + // Round out reader. + Asm->EmitULEB128(1, "CIE Code Alignment Factor"); + Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor"); + Asm->OutStreamer.AddComment("CIE Return Address Column"); + + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true)); + + if (Augmentation[0]) { + Asm->EmitULEB128(AugmentationSize, "Augmentation Size"); + + // If there is a personality, we need to indicate the function's location. + if (PersonalityFn) { + Asm->EmitEncodingByte(PerEncoding, "Personality"); + Asm->OutStreamer.AddComment("Personality"); + Asm->EmitReference(PersonalityFn, PerEncoding); + } + if (UsesLSDA[Index]) + Asm->EmitEncodingByte(LSDAEncoding, "LSDA"); + if (FDEEncoding != dwarf::DW_EH_PE_absptr) + Asm->EmitEncodingByte(FDEEncoding, "FDE"); + } + + // Indicate locations of general callee saved registers in frame. + std::vector<MachineMove> Moves; + RI->getInitialFrameState(Moves); + Asm->EmitFrameMoves(Moves, 0, true); + + // On Darwin the linker honors the alignment of eh_frame, which means it must + // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get + // holes which confuse readers of eh_frame. + Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index)); +} + +/// EmitFDE - Emit the Frame Description Entry (FDE) for the function. +void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { + assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && + "Should not emit 'available externally' functions at all"); + + const Function *TheFunc = EHFrameInfo.function; + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(); + + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + + // Externally visible entry into the functions eh frame info. If the + // corresponding function is static, this should not be externally visible. + if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal()) + Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global); + + // If corresponding function is weak definition, this should be too. + if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective()) + Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, + MCSA_WeakDefinition); + + // If corresponding function is hidden, this should be too. + if (TheFunc->hasHiddenVisibility()) + if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr()) + Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, + HiddenAttr); + + // If there are no calls then you can't unwind. This may mean we can omit the + // EH Frame, but some environments do not handle weak absolute symbols. If + // UnwindTablesMandatory is set we cannot do this optimization; the unwind + // info is to be available for non-EH uses. + if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory && + (!TheFunc->isWeakForLinker() || + !Asm->MAI->getWeakDefDirective() || + TLOF.getSupportsWeakOmittedEHFrame())) { + Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym, + MCConstantExpr::Create(0, Asm->OutContext)); + // This name has no connection to the function, so it might get + // dead-stripped when the function is not, erroneously. Prohibit + // dead-stripping unconditionally. + if (Asm->MAI->hasNoDeadStrip()) + Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, + MCSA_NoDeadStrip); + } else { + Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym); + + // EH frame header. + Asm->OutStreamer.AddComment("Length of Frame Information Entry"); + Asm->EmitLabelDifference( + Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number), + Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin", + EHFrameInfo.Number)); + + Asm->OutStreamer.AddComment("FDE CIE offset"); + Asm->EmitLabelDifference( + Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), + Asm->GetTempSymbol("eh_frame_common", + EHFrameInfo.PersonalityIndex), 4); + + MCSymbol *EHFuncBeginSym = + Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number); + + Asm->OutStreamer.AddComment("FDE initial location"); + Asm->EmitReference(EHFuncBeginSym, FDEEncoding); + + Asm->OutStreamer.AddComment("FDE address range"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end", + EHFrameInfo.Number), + EHFuncBeginSym, + Asm->GetSizeOfEncodedValue(FDEEncoding)); + + // If there is a personality and landing pads then point to the language + // specific data area in the exception table. + if (MMI->getPersonalities()[0] != NULL) { + unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding); + + Asm->EmitULEB128(Size, "Augmentation size"); + Asm->OutStreamer.AddComment("Language Specific Data Area"); + if (EHFrameInfo.hasLandingPads) + Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number), + LSDAEncoding); + else + Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/); + + } else { + Asm->EmitULEB128(0, "Augmentation size"); + } + + // Indicate locations of function specific callee saved registers in frame. + Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true); + + // On Darwin the linker honors the alignment of eh_frame, which means it + // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you + // get holes which confuse readers of eh_frame. + Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end", + EHFrameInfo.Number)); + + // If the function is marked used, this table should be also. We cannot + // make the mark unconditional in this case, since retaining the table also + // retains the function in this case, and there is code around that depends + // on unused functions (calling undefined externals) being dead-stripped to + // link correctly. Yes, there really is. + if (MMI->isUsedFunction(EHFrameInfo.function)) + if (Asm->MAI->hasNoDeadStrip()) + Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, + MCSA_NoDeadStrip); + } + Asm->OutStreamer.AddBlankLine(); +} + +/// SharedTypeIds - How many leading type ids two landing pads have in common. +unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, + const LandingPadInfo *R) { + const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + unsigned Count = 0; + + for (; Count != MinSize; ++Count) + if (LIds[Count] != RIds[Count]) + return Count; + + return Count; +} + +/// PadLT - Order landing pads lexicographically by type id. +bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) { + const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + + for (unsigned i = 0; i != MinSize; ++i) + if (LIds[i] != RIds[i]) + return LIds[i] < RIds[i]; + + return LSize < RSize; +} + +/// ComputeActionsTable - Compute the actions table and gather the first action +/// index for each landing pad site. +unsigned DwarfException:: +ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions) { + + // The action table follows the call-site table in the LSDA. The individual + // records are of two types: + // + // * Catch clause + // * Exception specification + // + // The two record kinds have the same format, with only small differences. + // They are distinguished by the "switch value" field: Catch clauses + // (TypeInfos) have strictly positive switch values, and exception + // specifications (FilterIds) have strictly negative switch values. Value 0 + // indicates a catch-all clause. + // + // Negative type IDs index into FilterIds. Positive type IDs index into + // TypeInfos. The value written for a positive type ID is just the type ID + // itself. For a negative type ID, however, the value written is the + // (negative) byte offset of the corresponding FilterIds entry. The byte + // offset is usually equal to the type ID (because the FilterIds entries are + // written using a variable width encoding, which outputs one byte per entry + // as long as the value written is not too large) but can differ. This kind + // of complication does not occur for positive type IDs because type infos are + // output using a fixed width encoding. FilterOffsets[i] holds the byte + // offset corresponding to FilterIds[i]. + + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + SmallVector<int, 16> FilterOffsets; + FilterOffsets.reserve(FilterIds.size()); + int Offset = -1; + + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) { + FilterOffsets.push_back(Offset); + Offset -= MCAsmInfo::getULEB128Size(*I); + } + + FirstActions.reserve(LandingPads.size()); + + int FirstAction = 0; + unsigned SizeActions = 0; + const LandingPadInfo *PrevLPI = 0; + + for (SmallVectorImpl<const LandingPadInfo *>::const_iterator + I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) { + const LandingPadInfo *LPI = *I; + const std::vector<int> &TypeIds = LPI->TypeIds; + unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0; + unsigned SizeSiteActions = 0; + + if (NumShared < TypeIds.size()) { + unsigned SizeAction = 0; + unsigned PrevAction = (unsigned)-1; + + if (NumShared) { + unsigned SizePrevIds = PrevLPI->TypeIds.size(); + assert(Actions.size()); + PrevAction = Actions.size() - 1; + SizeAction = + MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) + + MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); + + for (unsigned j = NumShared; j != SizePrevIds; ++j) { + assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!"); + SizeAction -= + MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeAction += -Actions[PrevAction].NextAction; + PrevAction = Actions[PrevAction].Previous; + } + } + + // Compute the actions. + for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) { + int TypeID = TypeIds[J]; + assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); + int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; + unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID); + + int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; + SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction); + SizeSiteActions += SizeAction; + + ActionEntry Action = { ValueForTypeID, NextAction, PrevAction }; + Actions.push_back(Action); + PrevAction = Actions.size() - 1; + } + + // Record the first action of the landing pad site. + FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; + } // else identical - re-use previous FirstAction + + // Information used when created the call-site table. The action record + // field of the call site record is the offset of the first associated + // action record, relative to the start of the actions table. This value is + // biased by 1 (1 indicating the start of the actions table), and 0 + // indicates that there are no actions. + FirstActions.push_back(FirstAction); + + // Compute this sites contribution to size. + SizeActions += SizeSiteActions; + + PrevLPI = LPI; + } + + return SizeActions; +} + +/// CallToNoUnwindFunction - Return `true' if this is a call to a function +/// marked `nounwind'. Return `false' otherwise. +bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { + assert(MI->getDesc().isCall() && "This should be a call instruction!"); + + bool MarkedNoUnwind = false; + bool SawFunc = false; + + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + + if (!MO.isGlobal()) continue; + + const Function *F = dyn_cast<Function>(MO.getGlobal()); + if (F == 0) continue; + + if (SawFunc) { + // Be conservative. If we have more than one function operand for this + // call, then we can't make the assumption that it's the callee and + // not a parameter to the call. + // + // FIXME: Determine if there's a way to say that `F' is the callee or + // parameter. + MarkedNoUnwind = false; + break; + } + + MarkedNoUnwind = F->doesNotThrow(); + SawFunc = true; + } + + return MarkedNoUnwind; +} + +/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke +/// has a try-range containing the call, a non-zero landing pad, and an +/// appropriate action. The entry for an ordinary call has a try-range +/// containing the call and zero for the landing pad and the action. Calls +/// marked 'nounwind' have no entry and must not be contained in the try-range +/// of any entry - they form gaps in the table. Entries must be ordered by +/// try-range address. +void DwarfException:: +ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const RangeMapType &PadMap, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) { + // The end label of the previous invoke or nounwind try-range. + MCSymbol *LastLabel = 0; + + // Whether there is a potentially throwing instruction (currently this means + // an ordinary call) between the end of the previous try-range and now. + bool SawPotentiallyThrowing = false; + + // Whether the last CallSite entry was for an invoke. + bool PreviousIsInvoke = false; + + // Visit all instructions in order of address. + for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); + MI != E; ++MI) { + if (!MI->isLabel()) { + if (MI->getDesc().isCall()) + SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI); + continue; + } + + // End of the previous try-range? + MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol(); + if (BeginLabel == LastLabel) + SawPotentiallyThrowing = false; + + // Beginning of a new try-range? + RangeMapType::const_iterator L = PadMap.find(BeginLabel); + if (L == PadMap.end()) + // Nope, it was just some random label. + continue; + + const PadRange &P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && + "Inconsistent landing pad map!"); + + // For Dwarf exception handling (SjLj handling doesn't use this). If some + // instruction between the previous try-range and this one may throw, + // create a call-site entry with no landing pad for the region between the + // try-ranges. + if (SawPotentiallyThrowing && + Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { + CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 }; + CallSites.push_back(Site); + PreviousIsInvoke = false; + } + + LastLabel = LandingPad->EndLabels[P.RangeIndex]; + assert(BeginLabel && LastLabel && "Invalid landing pad!"); + + if (!LandingPad->LandingPadLabel) { + // Create a gap. + PreviousIsInvoke = false; + } else { + // This try-range is for an invoke. + CallSiteEntry Site = { + BeginLabel, + LastLabel, + LandingPad->LandingPadLabel, + FirstActions[P.PadIndex] + }; + + // Try to merge with the previous call-site. SJLJ doesn't do this + if (PreviousIsInvoke && + Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { + CallSiteEntry &Prev = CallSites.back(); + if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { + // Extend the range of the previous entry. + Prev.EndLabel = Site.EndLabel; + continue; + } + } + + // Otherwise, create a new call-site. + if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) + CallSites.push_back(Site); + else { + // SjLj EH must maintain the call sites in the order assigned + // to them by the SjLjPrepare pass. + unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel); + if (CallSites.size() < SiteNo) + CallSites.resize(SiteNo); + CallSites[SiteNo - 1] = Site; + } + PreviousIsInvoke = true; + } + } + } + + // If some instruction between the previous try-range and the end of the + // function may throw, create a call-site entry with no landing pad for the + // region following the try-range. + if (SawPotentiallyThrowing && + Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { + CallSiteEntry Site = { LastLabel, 0, 0, 0 }; + CallSites.push_back(Site); + } +} + +/// EmitExceptionTable - Emit landing pads and actions. +/// +/// The general organization of the table is complex, but the basic concepts are +/// easy. First there is a header which describes the location and organization +/// of the three components that follow. +/// +/// 1. The landing pad site information describes the range of code covered by +/// the try. In our case it's an accumulation of the ranges covered by the +/// invokes in the try. There is also a reference to the landing pad that +/// handles the exception once processed. Finally an index into the actions +/// table. +/// 2. The action table, in our case, is composed of pairs of type IDs and next +/// action offset. Starting with the action index from the landing pad +/// site, each type ID is checked for a match to the current exception. If +/// it matches then the exception and type id are passed on to the landing +/// pad. Otherwise the next action is looked up. This chain is terminated +/// with a next action of zero. If no type id is found then the frame is +/// unwound and handling continues. +/// 3. Type ID table contains references to all the C++ typeinfo for all +/// catches in the function. This tables is reverse indexed base 1. +void DwarfException::EmitExceptionTable() { + const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + + // Sort the landing pads in order of their type ids. This is used to fold + // duplicate actions. + SmallVector<const LandingPadInfo *, 64> LandingPads; + LandingPads.reserve(PadInfos.size()); + + for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) + LandingPads.push_back(&PadInfos[i]); + + std::sort(LandingPads.begin(), LandingPads.end(), PadLT); + + // Compute the actions table and gather the first action index for each + // landing pad site. + SmallVector<ActionEntry, 32> Actions; + SmallVector<unsigned, 64> FirstActions; + unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions); + + // Invokes and nounwind calls have entries in PadMap (due to being bracketed + // by try-range labels when lowered). Ordinary calls do not, so appropriate + // try-ranges for them need be deduced when using DWARF exception handling. + RangeMapType PadMap; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + MCSymbol *BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } + + // Compute the call-site table. + SmallVector<CallSiteEntry, 64> CallSites; + ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); + + // Final tallies. + + // Call sites. + bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true; + + unsigned CallSiteTableLength; + if (IsSJLJ) + CallSiteTableLength = 0; + else { + unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4 + unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4 + unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4 + CallSiteTableLength = + CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize); + } + + for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { + CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action); + if (IsSJLJ) + CallSiteTableLength += MCAsmInfo::getULEB128Size(i); + } + + // Type infos. + const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); + unsigned TTypeEncoding; + unsigned TypeFormatSize; + + if (!HaveTTData) { + // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say + // that we're omitting that bit. + TTypeEncoding = dwarf::DW_EH_PE_omit; + // dwarf::DW_EH_PE_absptr + TypeFormatSize = Asm->getTargetData().getPointerSize(); + } else { + // Okay, we have actual filters or typeinfos to emit. As such, we need to + // pick a type encoding for them. We're about to emit a list of pointers to + // typeinfo objects at the end of the LSDA. However, unless we're in static + // mode, this reference will require a relocation by the dynamic linker. + // + // Because of this, we have a couple of options: + // + // 1) If we are in -static mode, we can always use an absolute reference + // from the LSDA, because the static linker will resolve it. + // + // 2) Otherwise, if the LSDA section is writable, we can output the direct + // reference to the typeinfo and allow the dynamic linker to relocate + // it. Since it is in a writable section, the dynamic linker won't + // have a problem. + // + // 3) Finally, if we're in PIC mode and the LDSA section isn't writable, + // we need to use some form of indirection. For example, on Darwin, + // we can output a statically-relocatable reference to a dyld stub. The + // offset to the stub is constant, but the contents are in a section + // that is updated by the dynamic linker. This is easy enough, but we + // need to tell the personality function of the unwinder to indirect + // through the dyld stub. + // + // FIXME: When (3) is actually implemented, we'll have to emit the stubs + // somewhere. This predicate should be moved to a shared location that is + // in target-independent code. + // + TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding(); + TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding); + } + + // Begin the exception table. + Asm->OutStreamer.SwitchSection(LSDASection); + Asm->EmitAlignment(2); + + // Emit the LSDA. + MCSymbol *GCCETSym = + Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+ + Twine(Asm->getFunctionNumber())); + Asm->OutStreamer.EmitLabel(GCCETSym); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception", + Asm->getFunctionNumber())); + + if (IsSJLJ) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_", + Asm->getFunctionNumber())); + + // Emit the LSDA header. + Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); + Asm->EmitEncodingByte(TTypeEncoding, "@TType"); + + // The type infos need to be aligned. GCC does this by inserting padding just + // before the type infos. However, this changes the size of the exception + // table, so you need to take this into account when you output the exception + // table size. However, the size is output using a variable length encoding. + // So by increasing the size by inserting padding, you may increase the number + // of bytes used for writing the size. If it increases, say by one byte, then + // you now need to output one less byte of padding to get the type infos + // aligned. However this decreases the size of the exception table. This + // changes the value you have to output for the exception table size. Due to + // the variable length encoding, the number of bytes used for writing the + // length may decrease. If so, you then have to increase the amount of + // padding. And so on. If you look carefully at the GCC code you will see that + // it indeed does this in a loop, going on and on until the values stabilize. + // We chose another solution: don't output padding inside the table like GCC + // does, instead output it before the table. + unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; + unsigned CallSiteTableLengthSize = + MCAsmInfo::getULEB128Size(CallSiteTableLength); + unsigned TTypeBaseOffset = + sizeof(int8_t) + // Call site format + CallSiteTableLengthSize + // Call site table length size + CallSiteTableLength + // Call site table length + SizeActions + // Actions size + SizeTypes; + unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset); + unsigned TotalSize = + sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size + TTypeBaseOffset; // TType base offset + unsigned SizeAlign = (4 - TotalSize) & 3; + + if (HaveTTData) { + // Account for any extra padding that will be added to the call site table + // length. + Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign); + SizeAlign = 0; + } + + // SjLj Exception handling + if (IsSJLJ) { + Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); + + // Add extra padding if it wasn't added to the TType base offset. + Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); + + // Emit the landing pad site information. + unsigned idx = 0; + for (SmallVectorImpl<CallSiteEntry>::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { + const CallSiteEntry &S = *I; + + // Offset of the landing pad, counted in 16-byte bundles relative to the + // @LPStart address. + Asm->EmitULEB128(idx, "Landing pad"); + + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + Asm->EmitULEB128(S.Action, "Action"); + } + } else { + // DWARF Exception handling + assert(Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf); + + // The call-site table is a list of all call sites that may throw an + // exception (including C++ 'throw' statements) in the procedure + // fragment. It immediately follows the LSDA header. Each entry indicates, + // for a given call, the first corresponding action record and corresponding + // landing pad. + // + // The table begins with the number of bytes, stored as an LEB128 + // compressed, unsigned integer. The records immediately follow the record + // count. They are sorted in increasing call-site address. Each record + // indicates: + // + // * The position of the call-site. + // * The position of the landing pad. + // * The first action record for that call site. + // + // A missing entry in the call-site table indicates that a call is not + // supposed to throw. + + // Emit the landing pad call site table. + Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); + + // Add extra padding if it wasn't added to the TType base offset. + Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); + + for (SmallVectorImpl<CallSiteEntry>::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { + const CallSiteEntry &S = *I; + + MCSymbol *EHFuncBeginSym = + Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); + + MCSymbol *BeginLabel = S.BeginLabel; + if (BeginLabel == 0) + BeginLabel = EHFuncBeginSym; + MCSymbol *EndLabel = S.EndLabel; + if (EndLabel == 0) + EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); + + // Offset of the call site relative to the previous call site, counted in + // number of 16-byte bundles. The first call site is counted relative to + // the start of the procedure fragment. + Asm->OutStreamer.AddComment("Region start"); + Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); + + Asm->OutStreamer.AddComment("Region length"); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); + + + // Offset of the landing pad, counted in 16-byte bundles relative to the + // @LPStart address. + Asm->OutStreamer.AddComment("Landing pad"); + if (!S.PadLabel) + Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + else + Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4); + + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + Asm->EmitULEB128(S.Action, "Action"); + } + } + + // Emit the Action Table. + if (Actions.size() != 0) { + Asm->OutStreamer.AddComment("-- Action Record Table --"); + Asm->OutStreamer.AddBlankLine(); + } + + for (SmallVectorImpl<ActionEntry>::const_iterator + I = Actions.begin(), E = Actions.end(); I != E; ++I) { + const ActionEntry &Action = *I; + Asm->OutStreamer.AddComment("Action Record"); + Asm->OutStreamer.AddBlankLine(); + + // Type Filter + // + // Used by the runtime to match the type of the thrown exception to the + // type of the catch clauses or the types in the exception specification. + Asm->EmitSLEB128(Action.ValueForTypeID, " TypeInfo index"); + + // Action Record + // + // Self-relative signed displacement in bytes of the next action record, + // or 0 if there is no next action record. + Asm->EmitSLEB128(Action.NextAction, " Next action"); + } + + // Emit the Catch TypeInfos. + if (!TypeInfos.empty()) { + Asm->OutStreamer.AddComment("-- Catch TypeInfos --"); + Asm->OutStreamer.AddBlankLine(); + } + for (std::vector<const GlobalVariable *>::const_reverse_iterator + I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { + const GlobalVariable *GV = *I; + + Asm->OutStreamer.AddComment("TypeInfo"); + if (GV) + Asm->EmitReference(GV, TTypeEncoding); + else + Asm->OutStreamer.EmitIntValue(0,Asm->GetSizeOfEncodedValue(TTypeEncoding), + 0); + } + + // Emit the Exception Specifications. + if (!FilterIds.empty()) { + Asm->OutStreamer.AddComment("-- Filter IDs --"); + Asm->OutStreamer.AddBlankLine(); + } + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { + unsigned TypeID = *I; + Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0); + } + + Asm->EmitAlignment(2); +} + +/// EndModule - Emit all exception information that should come after the +/// content. +void DwarfException::EndModule() { + if (Asm->MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf) + return; + + if (!shouldEmitMovesModule && !shouldEmitTableModule) + return; + + const std::vector<const Function*> &Personalities = MMI->getPersonalities(); + + for (unsigned I = 0, E = Personalities.size(); I < E; ++I) + EmitCIE(Personalities[I], I); + + for (std::vector<FunctionEHFrameInfo>::iterator + I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I) + EmitFDE(*I); +} + +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. +void DwarfException::BeginFunction(const MachineFunction *MF) { + shouldEmitTable = shouldEmitMoves = false; + + // If any landing pads survive, we need an EH table. + shouldEmitTable = !MMI->getLandingPads().empty(); + + // See if we need frame move info. + shouldEmitMoves = + !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory; + + if (shouldEmitMoves || shouldEmitTable) + // Assumes in correct section after the entry point. + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); + + shouldEmitTableModule |= shouldEmitTable; + shouldEmitMovesModule |= shouldEmitMoves; +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void DwarfException::EndFunction() { + if (!shouldEmitMoves && !shouldEmitTable) return; + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", + Asm->getFunctionNumber())); + + // Record if this personality index uses a landing pad. + bool HasLandingPad = !MMI->getLandingPads().empty(); + UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad; + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + if (HasLandingPad) + EmitExceptionTable(); + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + MCSymbol *FunctionEHSym = + Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh", + TLOF.isFunctionEHFrameSymbolPrivate()); + + // Save EH frame information + EHFrames. + push_back(FunctionEHFrameInfo(FunctionEHSym, + Asm->getFunctionNumber(), + MMI->getPersonalityIndex(), + Asm->MF->getFrameInfo()->adjustsStack(), + !MMI->getLandingPads().empty(), + MMI->getFrameMoves(), + Asm->MF->getFunction())); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h new file mode 100644 index 0000000..bc311e6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -0,0 +1,191 @@ +//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H + +#include "llvm/ADT/DenseMap.h" +#include <vector> + +namespace llvm { + +template <typename T> class SmallVectorImpl; +struct LandingPadInfo; +class MachineModuleInfo; +class MachineMove; +class MachineInstr; +class MachineFunction; +class MCAsmInfo; +class MCExpr; +class MCSymbol; +class Function; +class AsmPrinter; + +//===----------------------------------------------------------------------===// +/// DwarfException - Emits Dwarf exception handling directives. +/// +class DwarfException { + /// Asm - Target of Dwarf emission. + AsmPrinter *Asm; + + /// MMI - Collected machine module information. + MachineModuleInfo *MMI; + + struct FunctionEHFrameInfo { + MCSymbol *FunctionEHSym; // L_foo.eh + unsigned Number; + unsigned PersonalityIndex; + bool adjustsStack; + bool hasLandingPads; + std::vector<MachineMove> Moves; + const Function *function; + + FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P, + bool hC, bool hL, + const std::vector<MachineMove> &M, + const Function *f): + FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P), + adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { } + }; + + std::vector<FunctionEHFrameInfo> EHFrames; + + /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index + /// uses an LSDA. If so, then we need to encode that information in the CIE's + /// augmentation. + DenseMap<unsigned, bool> UsesLSDA; + + /// shouldEmitTable - Per-function flag to indicate if EH tables should + /// be emitted. + bool shouldEmitTable; + + /// shouldEmitMoves - Per-function flag to indicate if frame moves info + /// should be emitted. + bool shouldEmitMoves; + + /// shouldEmitTableModule - Per-module flag to indicate if EH tables + /// should be emitted. + bool shouldEmitTableModule; + + /// shouldEmitFrameModule - Per-module flag to indicate if frame moves + /// should be emitted. + bool shouldEmitMovesModule; + + /// EmitCIE - Emit a Common Information Entry (CIE). This holds information + /// that is shared among many Frame Description Entries. There is at least + /// one CIE in every non-empty .debug_frame section. + void EmitCIE(const Function *Personality, unsigned Index); + + /// EmitFDE - Emit the Frame Description Entry (FDE) for the function. + void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo); + + /// EmitExceptionTable - Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. + + /// SharedTypeIds - How many leading type ids two landing pads have in common. + static unsigned SharedTypeIds(const LandingPadInfo *L, + const LandingPadInfo *R); + + /// PadLT - Order landing pads lexicographically by type id. + static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R); + + /// PadRange - Structure holding a try-range and the associated landing pad. + struct PadRange { + // The index of the landing pad. + unsigned PadIndex; + // The index of the begin and end labels in the landing pad's label lists. + unsigned RangeIndex; + }; + + typedef DenseMap<MCSymbol *, PadRange> RangeMapType; + + /// ActionEntry - Structure describing an entry in the actions table. + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + unsigned Previous; + }; + + /// CallSiteEntry - Structure describing an entry in the call-site table. + struct CallSiteEntry { + // The 'try-range' is BeginLabel .. EndLabel. + MCSymbol *BeginLabel; // zero indicates the start of the function. + MCSymbol *EndLabel; // zero indicates the end of the function. + + // The landing pad starts at PadLabel. + MCSymbol *PadLabel; // zero indicates that there is no landing pad. + unsigned Action; + }; + + /// ComputeActionsTable - Compute the actions table and gather the first + /// action index for each landing pad site. + unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions); + + /// CallToNoUnwindFunction - Return `true' if this is a call to a function + /// marked `nounwind'. Return `false' otherwise. + bool CallToNoUnwindFunction(const MachineInstr *MI); + + /// ComputeCallSiteTable - Compute the call-site table. The entry for an + /// invoke has a try-range containing the call, a non-zero landing pad and an + /// appropriate action. The entry for an ordinary call has a try-range + /// containing the call and zero for the landing pad and the action. Calls + /// marked 'nounwind' have no entry and must not be contained in the try-range + /// of any entry - they form gaps in the table. Entries must be ordered by + /// try-range address. + void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const RangeMapType &PadMap, + const SmallVectorImpl<const LandingPadInfo *> &LPs, + const SmallVectorImpl<unsigned> &FirstActions); + void EmitExceptionTable(); + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfException(AsmPrinter *A); + ~DwarfException(); + + /// EndModule - Emit all exception information that should come after the + /// content. + void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + void BeginFunction(const MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + void EndFunction(); +}; + +} // End of namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Makefile b/contrib/llvm/lib/CodeGen/AsmPrinter/Makefile new file mode 100644 index 0000000..60aa6cb --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Makefile @@ -0,0 +1,13 @@ +##===- lib/CodeGen/AsmPrinter/Makefile ---------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMAsmPrinter + +include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp new file mode 100644 index 0000000..c8a63cf --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -0,0 +1,165 @@ +//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements printing the assembly code for an Ocaml frametable. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +namespace { + + class OcamlGCMetadataPrinter : public GCMetadataPrinter { + public: + void beginAssembly(AsmPrinter &AP); + void finishAssembly(AsmPrinter &AP); + }; + +} + +static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter> +Y("ocaml", "ocaml 3.10-compatible collector"); + +void llvm::linkOcamlGCPrinter() { } + +static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { + const std::string &MId = M.getModuleIdentifier(); + + std::string SymName; + SymName += "caml"; + size_t Letter = SymName.size(); + SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); + SymName += "__"; + SymName += Id; + + // Capitalize the first letter of the module name. + SymName[Letter] = toupper(SymName[Letter]); + + SmallString<128> TmpStr; + AP.Mang->getNameWithPrefix(TmpStr, SymName); + + MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr); + + AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); + AP.OutStreamer.EmitLabel(Sym); +} + +void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) { + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(getModule(), AP, "code_begin"); + + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), AP, "data_begin"); +} + +/// emitAssembly - Print the frametable. The ocaml frametable format is thus: +/// +/// extern "C" struct align(sizeof(intptr_t)) { +/// uint16_t NumDescriptors; +/// struct align(sizeof(intptr_t)) { +/// void *ReturnAddress; +/// uint16_t FrameSize; +/// uint16_t NumLiveOffsets; +/// uint16_t LiveOffsets[NumLiveOffsets]; +/// } Descriptors[NumDescriptors]; +/// } caml${module}__frametable; +/// +/// Note that this precludes programs from stack frames larger than 64K +/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if +/// either condition is detected in a function which uses the GC. +/// +void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { + unsigned IntPtrSize = AP.TM.getTargetData()->getPointerSize(); + + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(getModule(), AP, "code_end"); + + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), AP, "data_end"); + + // FIXME: Why does ocaml emit this?? + AP.OutStreamer.EmitIntValue(0, IntPtrSize, 0); + + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), AP, "frametable"); + + int NumDescriptors = 0; + for (iterator I = begin(), IE = end(); I != IE; ++I) { + GCFunctionInfo &FI = **I; + for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { + NumDescriptors++; + } + } + + if (NumDescriptors >= 1<<16) { + // Very rude! + report_fatal_error(" Too much descriptor for ocaml GC"); + } + AP.EmitInt16(NumDescriptors); + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + + for (iterator I = begin(), IE = end(); I != IE; ++I) { + GCFunctionInfo &FI = **I; + + uint64_t FrameSize = FI.getFrameSize(); + if (FrameSize >= 1<<16) { + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Frame size " + Twine(FrameSize) + ">= 65536.\n" + "(" + Twine(uintptr_t(&FI)) + ")"); + } + + AP.OutStreamer.AddComment("live roots for " + + Twine(FI.getFunction().getName())); + AP.OutStreamer.AddBlankLine(); + + for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { + size_t LiveCount = FI.live_size(J); + if (LiveCount >= 1<<16) { + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Live root count "+Twine(LiveCount)+" >= 65536."); + } + + AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0); + AP.EmitInt16(FrameSize); + AP.EmitInt16(LiveCount); + + for (GCFunctionInfo::live_iterator K = FI.live_begin(J), + KE = FI.live_end(J); K != KE; ++K) { + if (K->StackOffset >= 1<<16) { + // Very rude! + report_fatal_error( + "GC root stack offset is outside of fixed stack frame and out " + "of range for ocaml GC!"); + } + AP.EmitInt16(K->StackOffset); + } + + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp new file mode 100644 index 0000000..cb81aa3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -0,0 +1,1339 @@ +//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass forwards branches to unconditional branches to make them branch +// directly to the target block. This pass often results in dead MBB's, which +// it then removes. +// +// Note that this pass must be run after register allocation, it cannot handle +// SSA form. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "branchfolding" +#include "BranchFolding.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); +STATISTIC(NumBranchOpts, "Number of branches optimized"); +STATISTIC(NumTailMerge , "Number of block tails merged"); + +static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", + cl::init(cl::BOU_UNSET), cl::Hidden); + +// Throttle for huge numbers of predecessors (compile speed problems) +static cl::opt<unsigned> +TailMergeThreshold("tail-merge-threshold", + cl::desc("Max number of predecessors to consider tail merging"), + cl::init(150), cl::Hidden); + +// Heuristic for tail merging (and, inversely, tail duplication). +// TODO: This should be replaced with a target query. +static cl::opt<unsigned> +TailMergeSize("tail-merge-size", + cl::desc("Min number of instructions to consider tail merging"), + cl::init(3), cl::Hidden); + +namespace { + /// BranchFolderPass - Wrap branch folder in a machine function pass. + class BranchFolderPass : public MachineFunctionPass, + public BranchFolder { + public: + static char ID; + explicit BranchFolderPass(bool defaultEnableTailMerge) + : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Control Flow Optimizer"; } + }; +} + +char BranchFolderPass::ID = 0; + +FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { + return new BranchFolderPass(DefaultEnableTailMerge); +} + +bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { + return OptimizeFunction(MF, + MF.getTarget().getInstrInfo(), + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); +} + + +BranchFolder::BranchFolder(bool defaultEnableTailMerge) { + switch (FlagEnableTailMerge) { + case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; + case cl::BOU_TRUE: EnableTailMerge = true; break; + case cl::BOU_FALSE: EnableTailMerge = false; break; + } +} + +/// RemoveDeadBlock - Remove the specified dead machine basic block from the +/// function, updating the CFG. +void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { + assert(MBB->pred_empty() && "MBB must be dead!"); + DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); + + MachineFunction *MF = MBB->getParent(); + // drop all successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end()-1); + + // Remove the block. + MF->erase(MBB); +} + +/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def +/// followed by terminators, and if the implicitly defined registers are not +/// used by the terminators, remove those implicit_def's. e.g. +/// BB1: +/// r0 = implicit_def +/// r1 = implicit_def +/// br +/// This block can be optimized away later if the implicit instructions are +/// removed. +bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { + SmallSet<unsigned, 4> ImpDefRegs; + MachineBasicBlock::iterator I = MBB->begin(); + while (I != MBB->end()) { + if (!I->isImplicitDef()) + break; + unsigned Reg = I->getOperand(0).getReg(); + ImpDefRegs.insert(Reg); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + ImpDefRegs.insert(SubReg); + ++I; + } + if (ImpDefRegs.empty()) + return false; + + MachineBasicBlock::iterator FirstTerm = I; + while (I != MBB->end()) { + if (!TII->isUnpredicatedTerminator(I)) + return false; + // See if it uses any of the implicitly defined registers. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (ImpDefRegs.count(Reg)) + return false; + } + ++I; + } + + I = MBB->begin(); + while (I != FirstTerm) { + MachineInstr *ImpDefMI = &*I; + ++I; + MBB->erase(ImpDefMI); + } + + return true; +} + +/// OptimizeFunction - Perhaps branch folding, tail merging and other +/// CFG optimizations on the given function. +bool BranchFolder::OptimizeFunction(MachineFunction &MF, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineModuleInfo *mmi) { + if (!tii) return false; + + TII = tii; + TRI = tri; + MMI = mmi; + + RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; + + // Fix CFG. The later algorithms expect it to be right. + bool MadeChange = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { + MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true)) + MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); + MadeChange |= OptimizeImpDefsBlock(MBB); + } + + bool MadeChangeThisIteration = true; + while (MadeChangeThisIteration) { + MadeChangeThisIteration = false; + MadeChangeThisIteration |= TailMergeBlocks(MF); + MadeChangeThisIteration |= OptimizeBranches(MF); + MadeChange |= MadeChangeThisIteration; + } + + // See if any jump tables have become dead as the code generator + // did its thing. + MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); + if (JTI == 0) { + delete RS; + return MadeChange; + } + + // Walk the function to find jump tables that are live. + BitVector JTIsLive(JTI->getJumpTables().size()); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); + BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + MachineOperand &Op = I->getOperand(op); + if (!Op.isJTI()) continue; + + // Remember that this JT is live. + JTIsLive.set(Op.getIndex()); + } + } + + // Finally, remove dead jump tables. This happens when the + // indirect jump was unreachable (and thus deleted). + for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) + if (!JTIsLive.test(i)) { + JTI->RemoveJumpTable(i); + MadeChange = true; + } + + delete RS; + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Tail Merging of Blocks +//===----------------------------------------------------------------------===// + +/// HashMachineInstr - Compute a hash value for MI and its operands. +static unsigned HashMachineInstr(const MachineInstr *MI) { + unsigned Hash = MI->getOpcode(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &Op = MI->getOperand(i); + + // Merge in bits from the operand if easy. + unsigned OperandHash = 0; + switch (Op.getType()) { + case MachineOperand::MO_Register: OperandHash = Op.getReg(); break; + case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break; + case MachineOperand::MO_MachineBasicBlock: + OperandHash = Op.getMBB()->getNumber(); + break; + case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + OperandHash = Op.getIndex(); + break; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + // Global address / external symbol are too hard, don't bother, but do + // pull in the offset. + OperandHash = Op.getOffset(); + break; + default: break; + } + + Hash += ((OperandHash << 3) | Op.getType()) << (i&31); + } + return Hash; +} + +/// HashEndOfMBB - Hash the last instruction in the MBB. +static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) { + MachineBasicBlock::const_iterator I = MBB->end(); + if (I == MBB->begin()) + return 0; // Empty MBB. + + --I; + // Skip debug info so it will not affect codegen. + while (I->isDebugValue()) { + if (I==MBB->begin()) + return 0; // MBB empty except for debug info. + --I; + } + + return HashMachineInstr(I); +} + +/// ComputeCommonTailLength - Given two machine basic blocks, compute the number +/// of instructions they actually have in common together at their end. Return +/// iterators for the first shared instruction in each block. +static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2, + MachineBasicBlock::iterator &I1, + MachineBasicBlock::iterator &I2) { + I1 = MBB1->end(); + I2 = MBB2->end(); + + unsigned TailLen = 0; + while (I1 != MBB1->begin() && I2 != MBB2->begin()) { + --I1; --I2; + // Skip debugging pseudos; necessary to avoid changing the code. + while (I1->isDebugValue()) { + if (I1==MBB1->begin()) { + while (I2->isDebugValue()) { + if (I2==MBB2->begin()) + // I1==DBG at begin; I2==DBG at begin + return TailLen; + --I2; + } + ++I2; + // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin + return TailLen; + } + --I1; + } + // I1==first (untested) non-DBG preceding known match + while (I2->isDebugValue()) { + if (I2==MBB2->begin()) { + ++I1; + // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin + return TailLen; + } + --I2; + } + // I1, I2==first (untested) non-DBGs preceding known match + if (!I1->isIdenticalTo(I2) || + // FIXME: This check is dubious. It's used to get around a problem where + // people incorrectly expect inline asm directives to remain in the same + // relative order. This is untenable because normal compiler + // optimizations (like this one) may reorder and/or merge these + // directives. + I1->isInlineAsm()) { + ++I1; ++I2; + break; + } + ++TailLen; + } + // Back past possible debugging pseudos at beginning of block. This matters + // when one block differs from the other only by whether debugging pseudos + // are present at the beginning. (This way, the various checks later for + // I1==MBB1->begin() work as expected.) + if (I1 == MBB1->begin() && I2 != MBB2->begin()) { + --I2; + while (I2->isDebugValue()) { + if (I2 == MBB2->begin()) { + return TailLen; + } + --I2; + } + ++I2; + } + if (I2 == MBB2->begin() && I1 != MBB1->begin()) { + --I1; + while (I1->isDebugValue()) { + if (I1 == MBB1->begin()) + return TailLen; + --I1; + } + ++I1; + } + return TailLen; +} + +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. +void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest) { + TII->ReplaceTailWithBranchTo(OldInst, NewDest); + ++NumTailMerge; +} + +/// SplitMBBAt - Given a machine basic block and an iterator into it, split the +/// MBB so that the part before the iterator falls into the part starting at the +/// iterator. This returns the new MBB. +MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1) { + if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) + return 0; + + MachineFunction &MF = *CurMBB.getParent(); + + // Create the fall-through block. + MachineFunction::iterator MBBI = &CurMBB; + MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock()); + CurMBB.getParent()->insert(++MBBI, NewMBB); + + // Move all the successors of this block to the specified block. + NewMBB->transferSuccessors(&CurMBB); + + // Add an edge from CurMBB to NewMBB for the fall-through. + CurMBB.addSuccessor(NewMBB); + + // Splice the code over. + NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); + + // For targets that use the register scavenger, we must maintain LiveIns. + if (RS) { + RS->enterBasicBlock(&CurMBB); + if (!CurMBB.empty()) + RS->forward(prior(CurMBB.end())); + BitVector RegsLiveAtExit(TRI->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) + if (RegsLiveAtExit[i]) + NewMBB->addLiveIn(i); + } + + return NewMBB; +} + +/// EstimateRuntime - Make a rough estimate for how long it will take to run +/// the specified code. +static unsigned EstimateRuntime(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + unsigned Time = 0; + for (; I != E; ++I) { + if (I->isDebugValue()) + continue; + const TargetInstrDesc &TID = I->getDesc(); + if (TID.isCall()) + Time += 10; + else if (TID.mayLoad() || TID.mayStore()) + Time += 2; + else + ++Time; + } + return Time; +} + +// CurMBB needs to add an unconditional branch to SuccMBB (we removed these +// branches temporarily for tail merging). In the case where CurMBB ends +// with a conditional branch to the next block, optimize by reversing the +// test and conditionally branching to SuccMBB instead. +static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, + const TargetInstrInfo *TII) { + MachineFunction *MF = CurMBB->getParent(); + MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB)); + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + DebugLoc dl; // FIXME: this is nowhere + if (I != MF->end() && + !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { + MachineBasicBlock *NextBB = I; + if (TBB == NextBB && !Cond.empty() && !FBB) { + if (!TII->ReverseBranchCondition(Cond)) { + TII->RemoveBranch(*CurMBB); + TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl); + return; + } + } + } + TII->InsertBranch(*CurMBB, SuccBB, NULL, + SmallVector<MachineOperand, 0>(), dl); +} + +bool +BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { + if (getHash() < o.getHash()) + return true; + else if (getHash() > o.getHash()) + return false; + else if (getBlock()->getNumber() < o.getBlock()->getNumber()) + return true; + else if (getBlock()->getNumber() > o.getBlock()->getNumber()) + return false; + else { + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. +#ifndef _GLIBCXX_DEBUG + llvm_unreachable("Predecessor appears twice"); +#endif + return false; + } +} + +/// CountTerminators - Count the number of terminators in the given +/// block and set I to the position of the first non-terminator, if there +/// is one, or MBB->end() otherwise. +static unsigned CountTerminators(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &I) { + I = MBB->end(); + unsigned NumTerms = 0; + for (;;) { + if (I == MBB->begin()) { + I = MBB->end(); + break; + } + --I; + if (!I->getDesc().isTerminator()) break; + ++NumTerms; + } + return NumTerms; +} + +/// ProfitableToMerge - Check if two machine basic blocks have a common tail +/// and decide if it would be profitable to merge those tails. Return the +/// length of the common tail and iterators to the first common instruction +/// in each block. +static bool ProfitableToMerge(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2, + unsigned minCommonTailLength, + unsigned &CommonTailLen, + MachineBasicBlock::iterator &I1, + MachineBasicBlock::iterator &I2, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { + CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); + MachineFunction *MF = MBB1->getParent(); + + if (CommonTailLen == 0) + return false; + + // It's almost always profitable to merge any number of non-terminator + // instructions with the block that falls through into the common successor. + if (MBB1 == PredBB || MBB2 == PredBB) { + MachineBasicBlock::iterator I; + unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I); + if (CommonTailLen > NumTerms) + return true; + } + + // If one of the blocks can be completely merged and happens to be in + // a position where the other could fall through into it, merge any number + // of instructions, because it can be done without a branch. + // TODO: If the blocks are not adjacent, move one of them so that they are? + if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin()) + return true; + if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin()) + return true; + + // If both blocks have an unconditional branch temporarily stripped out, + // count that as an additional common instruction for the following + // heuristics. + unsigned EffectiveTailLen = CommonTailLen; + if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && + !MBB1->back().getDesc().isBarrier() && + !MBB2->back().getDesc().isBarrier()) + ++EffectiveTailLen; + + // Check if the common tail is long enough to be worthwhile. + if (EffectiveTailLen >= minCommonTailLength) + return true; + + // If we are optimizing for code size, 2 instructions in common is enough if + // we don't have to split a block. At worst we will be introducing 1 new + // branch instruction, which is likely to be smaller than the 2 + // instructions that would be deleted in the merge. + if (EffectiveTailLen >= 2 && + MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + (I1 == MBB1->begin() || I2 == MBB2->begin())) + return true; + + return false; +} + +/// ComputeSameTails - Look through all the blocks in MergePotentials that have +/// hash CurHash (guaranteed to match the last element). Build the vector +/// SameTails of all those that have the (same) largest number of instructions +/// in common of any pair of these blocks. SameTails entries contain an +/// iterator into MergePotentials (from which the MachineBasicBlock can be +/// found) and a MachineBasicBlock::iterator into that MBB indicating the +/// instruction where the matching code sequence begins. +/// Order of elements in SameTails is the reverse of the order in which +/// those blocks appear in MergePotentials (where they are not necessarily +/// consecutive). +unsigned BranchFolder::ComputeSameTails(unsigned CurHash, + unsigned minCommonTailLength, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { + unsigned maxCommonTailLength = 0U; + SameTails.clear(); + MachineBasicBlock::iterator TrialBBI1, TrialBBI2; + MPIterator HighestMPIter = prior(MergePotentials.end()); + for (MPIterator CurMPIter = prior(MergePotentials.end()), + B = MergePotentials.begin(); + CurMPIter != B && CurMPIter->getHash() == CurHash; + --CurMPIter) { + for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) { + unsigned CommonTailLen; + if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), + minCommonTailLength, + CommonTailLen, TrialBBI1, TrialBBI2, + SuccBB, PredBB)) { + if (CommonTailLen > maxCommonTailLength) { + SameTails.clear(); + maxCommonTailLength = CommonTailLen; + HighestMPIter = CurMPIter; + SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1)); + } + if (HighestMPIter == CurMPIter && + CommonTailLen == maxCommonTailLength) + SameTails.push_back(SameTailElt(I, TrialBBI2)); + } + if (I == B) + break; + } + } + return maxCommonTailLength; +} + +/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from +/// MergePotentials, restoring branches at ends of blocks as appropriate. +void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { + MPIterator CurMPIter, B; + for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); + CurMPIter->getHash() == CurHash; + --CurMPIter) { + // Put the unconditional branch back, if we need one. + MachineBasicBlock *CurMBB = CurMPIter->getBlock(); + if (SuccBB && CurMBB != PredBB) + FixTail(CurMBB, SuccBB, TII); + if (CurMPIter == B) + break; + } + if (CurMPIter->getHash() != CurHash) + CurMPIter++; + MergePotentials.erase(CurMPIter, MergePotentials.end()); +} + +/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist +/// only of the common tail. Create a block that does by splitting one. +bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex) { + commonTailIndex = 0; + unsigned TimeEstimate = ~0U; + for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { + // Use PredBB if possible; that doesn't require a new branch. + if (SameTails[i].getBlock() == PredBB) { + commonTailIndex = i; + break; + } + // Otherwise, make a (fairly bogus) choice based on estimate of + // how long it will take the various blocks to execute. + unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(), + SameTails[i].getTailStartPos()); + if (t <= TimeEstimate) { + TimeEstimate = t; + commonTailIndex = i; + } + } + + MachineBasicBlock::iterator BBI = + SameTails[commonTailIndex].getTailStartPos(); + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + + // If the common tail includes any debug info we will take it pretty + // randomly from one of the inputs. Might be better to remove it? + DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " + << maxCommonTailLength); + + MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + if (!newMBB) { + DEBUG(dbgs() << "... failed!"); + return false; + } + + SameTails[commonTailIndex].setBlock(newMBB); + SameTails[commonTailIndex].setTailStartPos(newMBB->begin()); + + // If we split PredBB, newMBB is the new predecessor. + if (PredBB == MBB) + PredBB = newMBB; + + return true; +} + +// See if any of the blocks in MergePotentials (which all have a common single +// successor, or all have no successor) can be tail-merged. If there is a +// successor, any blocks in MergePotentials that are not tail-merged and +// are not immediately before Succ must have an unconditional branch to +// Succ added (but the predecessor/successor lists need no adjustment). +// The lone predecessor of Succ that falls through into Succ, +// if any, is given in PredBB. + +bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { + bool MadeChange = false; + + // Except for the special cases below, tail-merge if there are at least + // this many instructions in common. + unsigned minCommonTailLength = TailMergeSize; + + DEBUG(dbgs() << "\nTryTailMergeBlocks: "; + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber() + << (i == e-1 ? "" : ", "); + dbgs() << "\n"; + if (SuccBB) { + dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n'; + if (PredBB) + dbgs() << " which has fall-through from BB#" + << PredBB->getNumber() << "\n"; + } + dbgs() << "Looking for common tails of at least " + << minCommonTailLength << " instruction" + << (minCommonTailLength == 1 ? "" : "s") << '\n'; + ); + + // Sort by hash value so that blocks with identical end sequences sort + // together. + std::stable_sort(MergePotentials.begin(), MergePotentials.end()); + + // Walk through equivalence sets looking for actual exact matches. + while (MergePotentials.size() > 1) { + unsigned CurHash = MergePotentials.back().getHash(); + + // Build SameTails, identifying the set of blocks with this hash code + // and with the maximum number of instructions in common. + unsigned maxCommonTailLength = ComputeSameTails(CurHash, + minCommonTailLength, + SuccBB, PredBB); + + // If we didn't find any pair that has at least minCommonTailLength + // instructions in common, remove all blocks with this hash code and retry. + if (SameTails.empty()) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } + + // If one of the blocks is the entire common tail (and not the entry + // block, which we can't jump to), we can treat all blocks with this same + // tail at once. Use PredBB if that is one of the possibilities, as that + // will not introduce any extra branches. + MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()-> + getParent()->begin(); + unsigned commonTailIndex = SameTails.size(); + // If there are two blocks, check to see if one can be made to fall through + // into the other. + if (SameTails.size() == 2 && + SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) && + SameTails[1].tailIsWholeBlock()) + commonTailIndex = 1; + else if (SameTails.size() == 2 && + SameTails[1].getBlock()->isLayoutSuccessor( + SameTails[0].getBlock()) && + SameTails[0].tailIsWholeBlock()) + commonTailIndex = 0; + else { + // Otherwise just pick one, favoring the fall-through predecessor if + // there is one. + for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { + MachineBasicBlock *MBB = SameTails[i].getBlock(); + if (MBB == EntryBB && SameTails[i].tailIsWholeBlock()) + continue; + if (MBB == PredBB) { + commonTailIndex = i; + break; + } + if (SameTails[i].tailIsWholeBlock()) + commonTailIndex = i; + } + } + + if (commonTailIndex == SameTails.size() || + (SameTails[commonTailIndex].getBlock() == PredBB && + !SameTails[commonTailIndex].tailIsWholeBlock())) { + // None of the blocks consist entirely of the common tail. + // Split a block so that one does. + if (!CreateCommonTailOnlyBlock(PredBB, + maxCommonTailLength, commonTailIndex)) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } + } + + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + // MBB is common tail. Adjust all other BB's to jump to this one. + // Traversal must be forwards so erases work. + DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber() + << " for "); + for (unsigned int i=0, e = SameTails.size(); i != e; ++i) { + if (commonTailIndex == i) + continue; + DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() + << (i == e-1 ? "" : ", ")); + // Hack the end off BB i, making it jump to BB commonTailIndex instead. + ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); + // BB i is no longer a predecessor of SuccBB; remove it from the worklist. + MergePotentials.erase(SameTails[i].getMPIter()); + } + DEBUG(dbgs() << "\n"); + // We leave commonTailIndex in the worklist in case there are other blocks + // that match it with a smaller number of instructions. + MadeChange = true; + } + return MadeChange; +} + +bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { + + if (!EnableTailMerge) return false; + + bool MadeChange = false; + + // First find blocks with no successors. + MergePotentials.clear(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + if (I->succ_empty()) + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); + } + + // See if we can do any tail merging on those. + if (MergePotentials.size() < TailMergeThreshold && + MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(NULL, NULL); + + // Look at blocks (IBB) with multiple predecessors (PBB). + // We change each predecessor to a canonical form, by + // (1) temporarily removing any unconditional branch from the predecessor + // to IBB, and + // (2) alter conditional branches so they branch to the other block + // not IBB; this may require adding back an unconditional branch to IBB + // later, where there wasn't one coming in. E.g. + // Bcc IBB + // fallthrough to QBB + // here becomes + // Bncc QBB + // with a conceptual B to IBB after that, which never actually exists. + // With those changes, we see whether the predecessors' tails match, + // and merge them if so. We change things out of canonical form and + // back to the way they were later in the process. (OptimizeBranches + // would undo some of this, but we can't use it, because we'd get into + // a compile-time infinite loop repeatedly doing and undoing the same + // transformations.) + + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + I != E; ++I) { + if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { + SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; + MachineBasicBlock *IBB = I; + MachineBasicBlock *PredBB = prior(I); + MergePotentials.clear(); + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + E2 = I->pred_end(); + P != E2; ++P) { + MachineBasicBlock *PBB = *P; + // Skip blocks that loop to themselves, can't tail merge these. + if (PBB == IBB) + continue; + // Visit each predecessor only once. + if (!UniquePreds.insert(PBB)) + continue; + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { + // Failing case: IBB is the target of a cbr, and + // we cannot reverse the branch. + SmallVector<MachineOperand, 4> NewCond(Cond); + if (!Cond.empty() && TBB == IBB) { + if (TII->ReverseBranchCondition(NewCond)) + continue; + // This is the QBB case described above + if (!FBB) + FBB = llvm::next(MachineFunction::iterator(PBB)); + } + // Failing case: the only way IBB can be reached from PBB is via + // exception handling. Happens for landing pads. Would be nice + // to have a bit in the edge so we didn't have to do all this. + if (IBB->isLandingPad()) { + MachineFunction::iterator IP = PBB; IP++; + MachineBasicBlock *PredNextBB = NULL; + if (IP != MF.end()) + PredNextBB = IP; + if (TBB == NULL) { + if (IBB != PredNextBB) // fallthrough + continue; + } else if (FBB) { + if (TBB != IBB && FBB != IBB) // cbr then ubr + continue; + } else if (Cond.empty()) { + if (TBB != IBB) // ubr + continue; + } else { + if (TBB != IBB && IBB != PredNextBB) // cbr + continue; + } + } + // Remove the unconditional branch at the end, if any. + if (TBB && (Cond.empty() || FBB)) { + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PBB); + if (!Cond.empty()) + // reinsert conditional branch only, for now + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); + } + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); + } + } + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(IBB, PredBB); + // Reinsert an unconditional branch if needed. + // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks. + PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + if (MergePotentials.size() == 1 && + MergePotentials.begin()->getBlock() != PredBB) + FixTail(MergePotentials.begin()->getBlock(), IBB, TII); + } + } + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Branch Optimization +//===----------------------------------------------------------------------===// + +bool BranchFolder::OptimizeBranches(MachineFunction &MF) { + bool MadeChange = false; + + // Make sure blocks are numbered in order + MF.RenumberBlocks(); + + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + MadeChange |= OptimizeBlock(MBB); + + // If it is dead, remove it. + if (MBB->pred_empty()) { + RemoveDeadBlock(MBB); + MadeChange = true; + ++NumDeadBlocks; + } + } + return MadeChange; +} + +// Blocks should be considered empty if they contain only debug info; +// else the debug info would affect codegen. +static bool IsEmptyBlock(MachineBasicBlock *MBB) { + if (MBB->empty()) + return true; + for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); + MBBI!=MBBE; ++MBBI) { + if (!MBBI->isDebugValue()) + return false; + } + return true; +} + +// Blocks with only debug info and branches should be considered the same +// as blocks with only branches. +static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { + MachineBasicBlock::iterator MBBI, MBBE; + for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) { + if (!MBBI->isDebugValue()) + break; + } + return (MBBI->getDesc().isBranch()); +} + +/// IsBetterFallthrough - Return true if it would be clearly better to +/// fall-through to MBB1 than to fall through into MBB2. This has to return +/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will +/// result in infinite loops. +static bool IsBetterFallthrough(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2) { + // Right now, we use a simple heuristic. If MBB2 ends with a call, and + // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to + // optimize branches that branch to either a return block or an assert block + // into a fallthrough to the return. + if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false; + + // If there is a clear successor ordering we make sure that one block + // will fall through to the next + if (MBB1->isSuccessor(MBB2)) return true; + if (MBB2->isSuccessor(MBB1)) return false; + + // Neither block consists entirely of debug info (per IsEmptyBlock check), + // so we needn't test for falling off the beginning here. + MachineBasicBlock::iterator MBB1I = --MBB1->end(); + while (MBB1I->isDebugValue()) + --MBB1I; + MachineBasicBlock::iterator MBB2I = --MBB2->end(); + while (MBB2I->isDebugValue()) + --MBB2I; + return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); +} + +/// OptimizeBlock - Analyze and optimize control flow related to the specified +/// block. This is never called on the entry block. +bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { + bool MadeChange = false; + MachineFunction &MF = *MBB->getParent(); + DebugLoc dl; // FIXME: this is nowhere +ReoptimizeBlock: + + MachineFunction::iterator FallThrough = MBB; + ++FallThrough; + + // If this block is empty, make everyone use its fall-through, not the block + // explicitly. Landing pads should not do this since the landing-pad table + // points to this block. Blocks with their addresses taken shouldn't be + // optimized away. + if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) { + // Dead block? Leave for cleanup later. + if (MBB->pred_empty()) return MadeChange; + + if (FallThrough == MF.end()) { + // TODO: Simplify preds to not branch here if possible! + } else { + // Rewrite all predecessors of the old block to go to the fallthrough + // instead. + while (!MBB->pred_empty()) { + MachineBasicBlock *Pred = *(MBB->pred_end()-1); + Pred->ReplaceUsesOfBlockWith(MBB, FallThrough); + } + // If MBB was the target of a jump table, update jump tables to go to the + // fallthrough instead. + if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) + MJTI->ReplaceMBBInJumpTables(MBB, FallThrough); + MadeChange = true; + } + return MadeChange; + } + + // Check to see if we can simplify the terminator of the block before this + // one. + MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB)); + + MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + SmallVector<MachineOperand, 4> PriorCond; + bool PriorUnAnalyzable = + TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); + if (!PriorUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, + !PriorCond.empty()); + + // If the previous branch is conditional and both conditions go to the same + // destination, remove the branch, replacing it with an unconditional one or + // a fall-through. + if (PriorTBB && PriorTBB == PriorFBB) { + TII->RemoveBranch(PrevBB); + PriorCond.clear(); + if (PriorTBB != MBB) + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + + // If the previous block unconditionally falls through to this block and + // this block has no other predecessors, move the contents of this block + // into the prior block. This doesn't usually happen when SimplifyCFG + // has been used, but it can happen if tail merging splits a fall-through + // predecessor of a block. + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && + PrevBB.succ_size() == 1 && + !MBB->hasAddressTaken()) { + DEBUG(dbgs() << "\nMerging into block: " << PrevBB + << "From MBB: " << *MBB); + PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); + PrevBB.removeSuccessor(PrevBB.succ_begin());; + assert(PrevBB.succ_empty()); + PrevBB.transferSuccessors(MBB); + MadeChange = true; + return MadeChange; + } + + // If the previous branch *only* branches to *this* block (conditional or + // not) remove the branch. + if (PriorTBB == MBB && PriorFBB == 0) { + TII->RemoveBranch(PrevBB); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + + // If the prior block branches somewhere else on the condition and here if + // the condition is false, remove the uncond second branch. + if (PriorFBB == MBB) { + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + + // If the prior block branches here on true and somewhere else on false, and + // if the branch condition is reversible, reverse the branch to create a + // fall-through. + if (PriorTBB == MBB) { + SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + } + + // If this block has no successors (e.g. it is a return block or ends with + // a call to a no-return function like abort or __cxa_throw) and if the pred + // falls through into this block, and if it would otherwise fall through + // into the block after this, move this block to the end of the function. + // + // We consider it more likely that execution will stay in the function (e.g. + // due to loops) than it is to exit it. This asserts in loops etc, moving + // the assert condition out of the loop body. + if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 && + MachineFunction::iterator(PriorTBB) == FallThrough && + !MBB->canFallThrough()) { + bool DoTransform = true; + + // We have to be careful that the succs of PredBB aren't both no-successor + // blocks. If neither have successors and if PredBB is the second from + // last block in the function, we'd just keep swapping the two blocks for + // last. Only do the swap if one is clearly better to fall through than + // the other. + if (FallThrough == --MF.end() && + !IsBetterFallthrough(PriorTBB, MBB)) + DoTransform = false; + + if (DoTransform) { + // Reverse the branch so we will fall through on the previous true cond. + SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + DEBUG(dbgs() << "\nMoving MBB: " << *MBB + << "To make fallthrough to: " << *PriorTBB << "\n"); + + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); + + // Move this block to the end of the function. + MBB->moveAfter(--MF.end()); + MadeChange = true; + ++NumBranchOpts; + return MadeChange; + } + } + } + } + + // Analyze the branch in the current block. + MachineBasicBlock *CurTBB = 0, *CurFBB = 0; + SmallVector<MachineOperand, 4> CurCond; + bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); + if (!CurUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); + + // If this is a two-way branch, and the FBB branches to this block, reverse + // the condition so the single-basic-block loop is faster. Instead of: + // Loop: xxx; jcc Out; jmp Loop + // we want: + // Loop: xxx; jncc Loop; jmp Out + if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { + SmallVector<MachineOperand, 4> NewCond(CurCond); + if (!TII->ReverseBranchCondition(NewCond)) { + TII->RemoveBranch(*MBB); + TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + } + + // If this branch is the only thing in its block, see if we can forward + // other blocks across it. + if (CurTBB && CurCond.empty() && CurFBB == 0 && + IsBranchOnlyBlock(MBB) && CurTBB != MBB && + !MBB->hasAddressTaken()) { + // This block may contain just an unconditional branch. Because there can + // be 'non-branch terminators' in the block, try removing the branch and + // then seeing if the block is empty. + TII->RemoveBranch(*MBB); + // If the only things remaining in the block are debug info, remove these + // as well, so this will behave the same as an empty block in non-debug + // mode. + if (!MBB->empty()) { + bool NonDebugInfoFound = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (!I->isDebugValue()) { + NonDebugInfoFound = true; + break; + } + } + if (!NonDebugInfoFound) + // Make the block empty, losing the debug info (we could probably + // improve this in some cases.) + MBB->erase(MBB->begin(), MBB->end()); + } + // If this block is just an unconditional branch to CurTBB, we can + // usually completely eliminate the block. The only case we cannot + // completely eliminate the block is when the block before this one + // falls through into MBB and we can't understand the prior block's branch + // condition. + if (MBB->empty()) { + bool PredHasNoFallThrough = !PrevBB.canFallThrough(); + if (PredHasNoFallThrough || !PriorUnAnalyzable || + !PrevBB.isSuccessor(MBB)) { + // If the prior block falls through into us, turn it into an + // explicit branch to us to make updates simpler. + if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && + PriorTBB != MBB && PriorFBB != MBB) { + if (PriorTBB == 0) { + assert(PriorCond.empty() && PriorFBB == 0 && + "Bad branch analysis"); + PriorTBB = MBB; + } else { + assert(PriorFBB == 0 && "Machine CFG out of date!"); + PriorFBB = MBB; + } + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl); + } + + // Iterate through all the predecessors, revectoring each in-turn. + size_t PI = 0; + bool DidChange = false; + bool HasBranchToSelf = false; + while(PI != MBB->pred_size()) { + MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI); + if (PMBB == MBB) { + // If this block has an uncond branch to itself, leave it. + ++PI; + HasBranchToSelf = true; + } else { + DidChange = true; + PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB); + // If this change resulted in PMBB ending in a conditional + // branch where both conditions go to the same destination, + // change this to an unconditional branch (and fix the CFG). + MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0; + SmallVector<MachineOperand, 4> NewCurCond; + bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, + NewCurFBB, NewCurCond, true); + if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { + TII->RemoveBranch(*PMBB); + NewCurCond.clear(); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl); + MadeChange = true; + ++NumBranchOpts; + PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); + } + } + } + + // Change any jumptables to go to the new MBB. + if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) + MJTI->ReplaceMBBInJumpTables(MBB, CurTBB); + if (DidChange) { + ++NumBranchOpts; + MadeChange = true; + if (!HasBranchToSelf) return MadeChange; + } + } + } + + // Add the branch back if the block is more than just an uncond branch. + TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl); + } + } + + // If the prior block doesn't fall through into this block, and if this + // block doesn't fall through into some other block, see if we can find a + // place to move this block where a fall-through will happen. + if (!PrevBB.canFallThrough()) { + + // Now we know that there was no fall-through into this block, check to + // see if it has a fall-through into its successor. + bool CurFallsThru = MBB->canFallThrough(); + + if (!MBB->isLandingPad()) { + // Check all the predecessors of this block. If one of them has no fall + // throughs, move this block right after it. + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + E = MBB->pred_end(); PI != E; ++PI) { + // Analyze the branch at the end of the pred. + MachineBasicBlock *PredBB = *PI; + MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; + MachineBasicBlock *PredTBB = 0, *PredFBB = 0; + SmallVector<MachineOperand, 4> PredCond; + if (PredBB != MBB && !PredBB->canFallThrough() && + !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) + && (!CurFallsThru || !CurTBB || !CurFBB) + && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { + // If the current block doesn't fall through, just move it. + // If the current block can fall through and does not end with a + // conditional branch, we need to append an unconditional jump to + // the (current) next block. To avoid a possible compile-time + // infinite loop, move blocks only backward in this case. + // Also, if there are already 2 branches here, we cannot add a third; + // this means we have the case + // Bcc next + // B elsewhere + // next: + if (CurFallsThru) { + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); + CurCond.clear(); + TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl); + } + MBB->moveAfter(PredBB); + MadeChange = true; + goto ReoptimizeBlock; + } + } + } + + if (!CurFallsThru) { + // Check all successors to see if we can move this block before it. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++SI) { + // Analyze the branch at the end of the block before the succ. + MachineBasicBlock *SuccBB = *SI; + MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev; + + // If this block doesn't already fall-through to that successor, and if + // the succ doesn't already have a block that can fall through into it, + // and if the successor isn't an EH destination, we can arrange for the + // fallthrough to happen. + if (SuccBB != MBB && &*SuccPrev != MBB && + !SuccPrev->canFallThrough() && !CurUnAnalyzable && + !SuccBB->isLandingPad()) { + MBB->moveBefore(SuccBB); + MadeChange = true; + goto ReoptimizeBlock; + } + } + + // Okay, there is no really great place to put this block. If, however, + // the block before this one would be a fall-through if this block were + // removed, move this block to the end of the function. + MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0; + SmallVector<MachineOperand, 4> PrevCond; + if (FallThrough != MF.end() && + !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && + PrevBB.isSuccessor(FallThrough)) { + MBB->moveAfter(--MF.end()); + MadeChange = true; + return MadeChange; + } + } + } + + return MadeChange; +} diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h new file mode 100644 index 0000000..15dfa7f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -0,0 +1,116 @@ +//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP +#define LLVM_CODEGEN_BRANCHFOLDING_HPP + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include <vector> + +namespace llvm { + class MachineFunction; + class MachineModuleInfo; + class RegScavenger; + class TargetInstrInfo; + class TargetRegisterInfo; + template<typename T> class SmallVectorImpl; + + class BranchFolder { + public: + explicit BranchFolder(bool defaultEnableTailMerge); + + bool OptimizeFunction(MachineFunction &MF, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineModuleInfo *mmi); + private: + class MergePotentialsElt { + unsigned Hash; + MachineBasicBlock *Block; + public: + MergePotentialsElt(unsigned h, MachineBasicBlock *b) + : Hash(h), Block(b) {} + + unsigned getHash() const { return Hash; } + MachineBasicBlock *getBlock() const { return Block; } + + void setBlock(MachineBasicBlock *MBB) { + Block = MBB; + } + + bool operator<(const MergePotentialsElt &) const; + }; + typedef std::vector<MergePotentialsElt>::iterator MPIterator; + std::vector<MergePotentialsElt> MergePotentials; + + class SameTailElt { + MPIterator MPIter; + MachineBasicBlock::iterator TailStartPos; + public: + SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp) + : MPIter(mp), TailStartPos(tsp) {} + + MPIterator getMPIter() const { + return MPIter; + } + MergePotentialsElt &getMergePotentialsElt() const { + return *getMPIter(); + } + MachineBasicBlock::iterator getTailStartPos() const { + return TailStartPos; + } + unsigned getHash() const { + return getMergePotentialsElt().getHash(); + } + MachineBasicBlock *getBlock() const { + return getMergePotentialsElt().getBlock(); + } + bool tailIsWholeBlock() const { + return TailStartPos == getBlock()->begin(); + } + + void setBlock(MachineBasicBlock *MBB) { + getMergePotentialsElt().setBlock(MBB); + } + void setTailStartPos(MachineBasicBlock::iterator Pos) { + TailStartPos = Pos; + } + }; + std::vector<SameTailElt> SameTails; + + bool EnableTailMerge; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineModuleInfo *MMI; + RegScavenger *RS; + + bool TailMergeBlocks(MachineFunction &MF); + bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest); + MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1); + unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB); + void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex); + + bool OptimizeBranches(MachineFunction &MF); + bool OptimizeBlock(MachineBasicBlock *MBB); + void RemoveDeadBlock(MachineBasicBlock *MBB); + bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); + }; +} + +#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */ diff --git a/contrib/llvm/lib/CodeGen/CMakeLists.txt b/contrib/llvm/lib/CodeGen/CMakeLists.txt new file mode 100644 index 0000000..2ef115d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CMakeLists.txt @@ -0,0 +1,86 @@ +add_llvm_library(LLVMCodeGen + AggressiveAntiDepBreaker.cpp + Analysis.cpp + BranchFolding.cpp + CalcSpillWeights.cpp + CallingConvLower.cpp + CodePlacementOpt.cpp + CriticalAntiDepBreaker.cpp + DeadMachineInstructionElim.cpp + DwarfEHPrepare.cpp + ELFCodeEmitter.cpp + ELFWriter.cpp + GCMetadata.cpp + GCMetadataPrinter.cpp + GCStrategy.cpp + IfConversion.cpp + InlineSpiller.cpp + IntrinsicLowering.cpp + LLVMTargetMachine.cpp + LatencyPriorityQueue.cpp + LiveInterval.cpp + LiveIntervalAnalysis.cpp + LiveStackAnalysis.cpp + LiveVariables.cpp + LocalStackSlotAllocation.cpp + LowerSubregs.cpp + MachineBasicBlock.cpp + MachineCSE.cpp + MachineDominators.cpp + MachineFunction.cpp + MachineFunctionAnalysis.cpp + MachineFunctionPass.cpp + MachineFunctionPrinterPass.cpp + MachineInstr.cpp + MachineLICM.cpp + MachineLoopInfo.cpp + MachineModuleInfo.cpp + MachineModuleInfoImpls.cpp + MachinePassRegistry.cpp + MachineRegisterInfo.cpp + MachineSSAUpdater.cpp + MachineSink.cpp + MachineVerifier.cpp + ObjectCodeEmitter.cpp + OcamlGC.cpp + OptimizePHIs.cpp + PHIElimination.cpp + Passes.cpp + PeepholeOptimizer.cpp + PostRAHazardRecognizer.cpp + PostRASchedulerList.cpp + PreAllocSplitting.cpp + ProcessImplicitDefs.cpp + PrologEpilogInserter.cpp + PseudoSourceValue.cpp + RegAllocFast.cpp + RegAllocLinearScan.cpp + RegAllocPBQP.cpp + RegisterCoalescer.cpp + RegisterScavenging.cpp + RenderMachineFunction.cpp + ScheduleDAG.cpp + ScheduleDAGEmit.cpp + ScheduleDAGInstrs.cpp + ScheduleDAGPrinter.cpp + ShadowStackGC.cpp + ShrinkWrapping.cpp + SimpleRegisterCoalescing.cpp + SjLjEHPrepare.cpp + SlotIndexes.cpp + Spiller.cpp + SplitKit.cpp + Splitter.cpp + StackProtector.cpp + StackSlotColoring.cpp + StrongPHIElimination.cpp + TailDuplication.cpp + TargetInstrInfoImpl.cpp + TargetLoweringObjectFileImpl.cpp + TwoAddressInstructionPass.cpp + UnreachableBlockElim.cpp + VirtRegMap.cpp + VirtRegRewriter.cpp + ) + +target_link_libraries (LLVMCodeGen LLVMCore LLVMScalarOpts) diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp new file mode 100644 index 0000000..1b7e08a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -0,0 +1,224 @@ +//===------------------------ CalcSpillWeights.cpp ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "calcspillweights" + +#include "llvm/Function.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +char CalculateSpillWeights::ID = 0; +INITIALIZE_PASS(CalculateSpillWeights, "calcspillweights", + "Calculate spill weights", false, false); + +void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<LiveIntervals>(); + au.addRequired<MachineLoopInfo>(); + au.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(au); +} + +bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { + + DEBUG(dbgs() << "********** Compute Spill Weights **********\n" + << "********** Function: " + << fn.getFunction()->getName() << '\n'); + + LiveIntervals &lis = getAnalysis<LiveIntervals>(); + VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>()); + for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) { + LiveInterval &li = *I->second; + if (TargetRegisterInfo::isVirtualRegister(li.reg)) + vrai.CalculateWeightAndHint(li); + } + return false; +} + +// Return the preferred allocation register for reg, given a COPY instruction. +static unsigned copyHint(const MachineInstr *mi, unsigned reg, + const TargetRegisterInfo &tri, + const MachineRegisterInfo &mri) { + unsigned sub, hreg, hsub; + if (mi->getOperand(0).getReg() == reg) { + sub = mi->getOperand(0).getSubReg(); + hreg = mi->getOperand(1).getReg(); + hsub = mi->getOperand(1).getSubReg(); + } else { + sub = mi->getOperand(1).getSubReg(); + hreg = mi->getOperand(0).getReg(); + hsub = mi->getOperand(0).getSubReg(); + } + + if (!hreg) + return 0; + + if (TargetRegisterInfo::isVirtualRegister(hreg)) + return sub == hsub ? hreg : 0; + + const TargetRegisterClass *rc = mri.getRegClass(reg); + + // Only allow physreg hints in rc. + if (sub == 0) + return rc->contains(hreg) ? hreg : 0; + + // reg:sub should match the physreg hreg. + return tri.getMatchingSuperReg(hreg, sub, rc); +} + +void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { + MachineRegisterInfo &mri = mf_.getRegInfo(); + const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo(); + MachineBasicBlock *mbb = 0; + MachineLoop *loop = 0; + unsigned loopDepth = 0; + bool isExiting = false; + float totalWeight = 0; + SmallPtrSet<MachineInstr*, 8> visited; + + // Find the best physreg hist and the best virtreg hint. + float bestPhys = 0, bestVirt = 0; + unsigned hintPhys = 0, hintVirt = 0; + + // Don't recompute a target specific hint. + bool noHint = mri.getRegAllocationHint(li.reg).first != 0; + + for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg); + MachineInstr *mi = I.skipInstruction();) { + if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) + continue; + if (!visited.insert(mi)) + continue; + + // Get loop info for mi. + if (mi->getParent() != mbb) { + mbb = mi->getParent(); + loop = loops_.getLoopFor(mbb); + loopDepth = loop ? loop->getLoopDepth() : 0; + isExiting = loop ? loop->isLoopExiting(mbb) : false; + } + + // Calculate instr weight. + bool reads, writes; + tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + float weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); + + // Give extra weight to what looks like a loop induction variable update. + if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb)) + weight *= 3; + + totalWeight += weight; + + // Get allocation hints from copies. + if (noHint || !mi->isCopy()) + continue; + unsigned hint = copyHint(mi, li.reg, tri, mri); + if (!hint) + continue; + float hweight = hint_[hint] += weight; + if (TargetRegisterInfo::isPhysicalRegister(hint)) { + if (hweight > bestPhys && lis_.isAllocatable(hint)) + bestPhys = hweight, hintPhys = hint; + } else { + if (hweight > bestVirt) + bestVirt = hweight, hintVirt = hint; + } + } + + hint_.clear(); + + // Always prefer the physreg hint. + if (unsigned hint = hintPhys ? hintPhys : hintVirt) { + mri.setRegAllocationHint(li.reg, 0, hint); + // Weakly boost the spill weifght of hinted registers. + totalWeight *= 1.01F; + } + + // Mark li as unspillable if all live ranges are tiny. + if (li.isZeroLength()) { + li.markNotSpillable(); + return; + } + + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. If none of the defs are + // loads, then it's potentially very cheap to re-materialize. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + bool isLoad = false; + SmallVector<LiveInterval*, 4> spillIs; + if (lis_.isReMaterializable(li, spillIs, isLoad)) { + if (isLoad) + totalWeight *= 0.9F; + else + totalWeight *= 0.5F; + } + + li.weight = totalWeight; + lis_.normalizeSpillWeight(li); +} + +void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { + MachineRegisterInfo &mri = mf_.getRegInfo(); + const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo(); + const TargetRegisterClass *orc = mri.getRegClass(reg); + SmallPtrSet<const TargetRegisterClass*,8> rcs; + + for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg), + E = mri.reg_nodbg_end(); I != E; ++I) { + // The targets don't have accurate enough regclass descriptions that we can + // handle subregs. We need something similar to + // TRI::getMatchingSuperRegClass, but returning a super class instead of a + // sub class. + if (I.getOperand().getSubReg()) { + DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n'); + return; + } + if (const TargetRegisterClass *rc = + I->getDesc().getRegClass(I.getOperandNo(), tri)) + rcs.insert(rc); + } + + // If we found no regclass constraints, just leave reg as is. + // In theory, we could inflate to the largest superclass of reg's existing + // class, but that might not be legal for the current cpu setting. + // This could happen if reg is only used by COPY instructions, so we may need + // to improve on this. + if (rcs.empty()) { + return; + } + + // Compute the intersection of all classes in rcs. + // This ought to be independent of iteration order, but if the target register + // classes don't form a proper algebra, it is possible to get different + // results. The solution is to make sure the intersection of any two register + // classes is also a register class or the null set. + const TargetRegisterClass *rc = 0; + for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(), + E = rcs.end(); I != E; ++I) { + rc = rc ? getCommonSubClass(rc, *I) : *I; + assert(rc && "Incompatible regclass constraints found"); + } + + if (rc == orc) + return; + DEBUG(dbgs() << "Inflating " << orc->getName() << ":%reg" << reg << " to " + << rc->getName() <<".\n"); + mri.setRegClass(reg, rc); +} diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp new file mode 100644 index 0000000..62ad817 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -0,0 +1,177 @@ +//===-- CallingConvLower.cpp - Calling Conventions ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CCState class, used for lowering and implementing +// calling conventions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, + SmallVector<CCValAssign, 16> &locs, LLVMContext &C) + : CallingConv(CC), IsVarArg(isVarArg), TM(tm), + TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { + // No stack is used. + StackOffset = 0; + + UsedRegs.resize((TRI.getNumRegs()+31)/32); +} + +// HandleByVal - Allocate a stack slot large enough to pass an argument by +// value. The size and alignment information of the argument is encoded in its +// parameter attribute. +void CCState::HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, + ISD::ArgFlagsTy ArgFlags) { + unsigned Align = ArgFlags.getByValAlign(); + unsigned Size = ArgFlags.getByValSize(); + if (MinSize > (int)Size) + Size = MinSize; + if (MinAlign > (int)Align) + Align = MinAlign; + unsigned Offset = AllocateStack(Size, Align); + + addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); +} + +/// MarkAllocated - Mark a register and all of its aliases as allocated. +void CCState::MarkAllocated(unsigned Reg) { + UsedRegs[Reg/32] |= 1 << (Reg&31); + + if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) + for (; (Reg = *RegAliases); ++RegAliases) + UsedRegs[Reg/32] |= 1 << (Reg&31); +} + +/// AnalyzeFormalArguments - Analyze an array of argument values, +/// incorporating info about the formals into this state. +void +CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn Fn) { + unsigned NumArgs = Ins.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + EVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Formal argument #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// CheckReturn - Analyze the return values of a function, returning true if +/// the return can be performed without sret-demotion, and false otherwise. +bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) + return false; + } + return true; +} + +/// AnalyzeReturn - Analyze the returned values of a return, +/// incorporating info about the result values into this state. +void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Return operand #" << i << " has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallOperands - Analyze the outgoing arguments to a call, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + unsigned NumOps = Outs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + EVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallOperands - Same as above except it takes vectors of types +/// and argument flags. +void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + CCAssignFn Fn) { + unsigned NumOps = ArgVTs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + EVT ArgVT = ArgVTs[i]; + ISD::ArgFlagsTy ArgFlags = Flags[i]; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallResult - Analyze the return values of a call, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn Fn) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT VT = Ins[i].VT; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { +#ifndef NDEBUG + dbgs() << "Call result #" << i << " has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallResult - Same as above except it's specialized for calls which +/// produce a single value. +void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) { + if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { +#ifndef NDEBUG + dbgs() << "Call result has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); + } +} diff --git a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp new file mode 100644 index 0000000..91a9536 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp @@ -0,0 +1,425 @@ +//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the pass that optimize code placement and align loop +// headers to target specific alignment boundary. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "code-placement" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumLoopsAligned, "Number of loops aligned"); +STATISTIC(NumIntraElim, "Number of intra loop branches eliminated"); +STATISTIC(NumIntraMoved, "Number of intra loop branches moved"); + +namespace { + class CodePlacementOpt : public MachineFunctionPass { + const MachineLoopInfo *MLI; + const TargetInstrInfo *TII; + const TargetLowering *TLI; + + public: + static char ID; + CodePlacementOpt() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { + return "Code Placement Optimizater"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + bool HasFallthrough(MachineBasicBlock *MBB); + bool HasAnalyzableTerminator(MachineBasicBlock *MBB); + void Splice(MachineFunction &MF, + MachineFunction::iterator InsertPt, + MachineFunction::iterator Begin, + MachineFunction::iterator End); + bool EliminateUnconditionalJumpsToTop(MachineFunction &MF, + MachineLoop *L); + bool MoveDiscontiguousLoopBlocks(MachineFunction &MF, + MachineLoop *L); + bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L); + bool OptimizeIntraLoopEdges(MachineFunction &MF); + bool AlignLoops(MachineFunction &MF); + bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align); + }; + + char CodePlacementOpt::ID = 0; +} // end anonymous namespace + +FunctionPass *llvm::createCodePlacementOptPass() { + return new CodePlacementOpt(); +} + +/// HasFallthrough - Test whether the given branch has a fallthrough, either as +/// a plain fallthrough or as a fallthrough case of a conditional branch. +/// +bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) { + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) + return false; + // This conditional branch has no fallthrough. + if (FBB) + return false; + // An unconditional branch has no fallthrough. + if (Cond.empty() && TBB) + return false; + // It has a fallthrough. + return true; +} + +/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB. +/// This is called before major changes are begun to test whether it will be +/// possible to complete the changes. +/// +/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed +/// whenever possible. +/// +bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) { + // Conservatively ignore EH landing pads. + if (MBB->isLandingPad()) return false; + + // Aggressively handle return blocks and similar constructs. + if (MBB->succ_empty()) return true; + + // Ask the target's AnalyzeBranch if it can handle this block. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + // Make sure the terminator is understood. + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) + return false; + // Ignore blocks which look like they might have EH-related control flow. + // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't + // recognize the possibility of a control transfer through an unwind. + // Such blocks contain EH_LABEL instructions, however they may be in the + // middle of the block. Instead of searching for them, just check to see + // if the CFG disagrees with AnalyzeBranch. + if (1u + !Cond.empty() != MBB->succ_size()) + return false; + // Make sure we have the option of reversing the condition. + if (!Cond.empty() && TII->ReverseBranchCondition(Cond)) + return false; + return true; +} + +/// Splice - Move the sequence of instructions [Begin,End) to just before +/// InsertPt. Update branch instructions as needed to account for broken +/// fallthrough edges and to take advantage of newly exposed fallthrough +/// opportunities. +/// +void CodePlacementOpt::Splice(MachineFunction &MF, + MachineFunction::iterator InsertPt, + MachineFunction::iterator Begin, + MachineFunction::iterator End) { + assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() && + "Splice can't change the entry block!"); + MachineFunction::iterator OldBeginPrior = prior(Begin); + MachineFunction::iterator OldEndPrior = prior(End); + + MF.splice(InsertPt, Begin, End); + + prior(Begin)->updateTerminator(); + OldBeginPrior->updateTerminator(); + OldEndPrior->updateTerminator(); +} + +/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump +/// to the loop top to the top of the loop so that they have a fall through. +/// This can introduce a branch on entry to the loop, but it can eliminate a +/// branch within the loop. See the @simple case in +/// test/CodeGen/X86/loop_blocks.ll for an example of this. +bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, + MachineLoop *L) { + bool Changed = false; + MachineBasicBlock *TopMBB = L->getTopBlock(); + + bool BotHasFallthrough = HasFallthrough(L->getBottomBlock()); + + if (TopMBB == MF.begin() || + HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) { + new_top: + for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(), + PE = TopMBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + if (Pred == TopMBB) continue; + if (HasFallthrough(Pred)) continue; + if (!L->contains(Pred)) continue; + + // Verify that we can analyze all the loop entry edges before beginning + // any changes which will require us to be able to analyze them. + if (Pred == MF.begin()) + continue; + if (!HasAnalyzableTerminator(Pred)) + continue; + if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred)))) + continue; + + // Move the block. + DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber() + << " to top of loop.\n"); + Changed = true; + + // Move it and all the blocks that can reach it via fallthrough edges + // exclusively, to keep existing fallthrough edges intact. + MachineFunction::iterator Begin = Pred; + MachineFunction::iterator End = llvm::next(Begin); + while (Begin != MF.begin()) { + MachineFunction::iterator Prior = prior(Begin); + if (Prior == MF.begin()) + break; + // Stop when a non-fallthrough edge is found. + if (!HasFallthrough(Prior)) + break; + // Stop if a block which could fall-through out of the loop is found. + if (Prior->isSuccessor(End)) + break; + // If we've reached the top, stop scanning. + if (Prior == MachineFunction::iterator(TopMBB)) { + // We know top currently has a fall through (because we just checked + // it) which would be lost if we do the transformation, so it isn't + // worthwhile to do the transformation unless it would expose a new + // fallthrough edge. + if (!Prior->isSuccessor(End)) + goto next_pred; + // Otherwise we can stop scanning and procede to move the blocks. + break; + } + // If we hit a switch or something complicated, don't move anything + // for this predecessor. + if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior)))) + break; + // Ok, the block prior to Begin will be moved along with the rest. + // Extend the range to include it. + Begin = Prior; + ++NumIntraMoved; + } + + // Move the blocks. + Splice(MF, TopMBB, Begin, End); + + // Update TopMBB. + TopMBB = L->getTopBlock(); + + // We have a new loop top. Iterate on it. We shouldn't have to do this + // too many times if BranchFolding has done a reasonable job. + goto new_top; + next_pred:; + } + } + + // If the loop previously didn't exit with a fall-through and it now does, + // we eliminated a branch. + if (Changed && + !BotHasFallthrough && + HasFallthrough(L->getBottomBlock())) { + ++NumIntraElim; + } + + return Changed; +} + +/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the +/// portion of the loop contiguous with the header. This usually makes the loop +/// contiguous, provided that AnalyzeBranch can handle all the relevant +/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll +/// for an example of this. +bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, + MachineLoop *L) { + bool Changed = false; + MachineBasicBlock *TopMBB = L->getTopBlock(); + MachineBasicBlock *BotMBB = L->getBottomBlock(); + + // Determine a position to move orphaned loop blocks to. If TopMBB is not + // entered via fallthrough and BotMBB is exited via fallthrough, prepend them + // to the top of the loop to avoid loosing that fallthrough. Otherwise append + // them to the bottom, even if it previously had a fallthrough, on the theory + // that it's worth an extra branch to keep the loop contiguous. + MachineFunction::iterator InsertPt = + llvm::next(MachineFunction::iterator(BotMBB)); + bool InsertAtTop = false; + if (TopMBB != MF.begin() && + !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) && + HasFallthrough(BotMBB)) { + InsertPt = TopMBB; + InsertAtTop = true; + } + + // Keep a record of which blocks are in the portion of the loop contiguous + // with the loop header. + SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks; + for (MachineFunction::iterator I = TopMBB, + E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I) + ContiguousBlocks.insert(I); + + // Find non-contigous blocks and fix them. + if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt))) + for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end(); + BI != BE; ++BI) { + MachineBasicBlock *BB = *BI; + + // Verify that we can analyze all the loop entry edges before beginning + // any changes which will require us to be able to analyze them. + if (!HasAnalyzableTerminator(BB)) + continue; + if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB)))) + continue; + + // If the layout predecessor is part of the loop, this block will be + // processed along with it. This keeps them in their relative order. + if (BB != MF.begin() && + L->contains(prior(MachineFunction::iterator(BB)))) + continue; + + // Check to see if this block is already contiguous with the main + // portion of the loop. + if (!ContiguousBlocks.insert(BB)) + continue; + + // Move the block. + DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber() + << " to be contiguous with loop.\n"); + Changed = true; + + // Process this block and all loop blocks contiguous with it, to keep + // them in their relative order. + MachineFunction::iterator Begin = BB; + MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB)); + for (; End != MF.end(); ++End) { + if (!L->contains(End)) break; + if (!HasAnalyzableTerminator(End)) break; + ContiguousBlocks.insert(End); + ++NumIntraMoved; + } + + // If we're inserting at the bottom of the loop, and the code we're + // moving originally had fall-through successors, bring the sucessors + // up with the loop blocks to preserve the fall-through edges. + if (!InsertAtTop) + for (; End != MF.end(); ++End) { + if (L->contains(End)) break; + if (!HasAnalyzableTerminator(End)) break; + if (!HasFallthrough(prior(End))) break; + } + + // Move the blocks. This may invalidate TopMBB and/or BotMBB, but + // we don't need them anymore at this point. + Splice(MF, InsertPt, Begin, End); + } + + return Changed; +} + +/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize +/// intra-loop branching and to form contiguous loops. +/// +/// This code takes the approach of making minor changes to the existing +/// layout to fix specific loop-oriented problems. Also, it depends on +/// AnalyzeBranch, which can't understand complex control instructions. +/// +bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, + MachineLoop *L) { + bool Changed = false; + + // Do optimization for nested loops. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); + + // Do optimization for this loop. + Changed |= EliminateUnconditionalJumpsToTop(MF, L); + Changed |= MoveDiscontiguousLoopBlocks(MF, L); + + return Changed; +} + +/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize +/// intra-loop branching and to form contiguous loops. +/// +bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) { + bool Changed = false; + + if (!TLI->shouldOptimizeCodePlacement()) + return Changed; + + // Do optimization for each loop in the function. + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); + I != E; ++I) + if (!(*I)->getParentLoop()) + Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); + + return Changed; +} + +/// AlignLoops - Align loop headers to target preferred alignments. +/// +bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { + const Function *F = MF.getFunction(); + if (F->hasFnAttr(Attribute::OptimizeForSize)) + return false; + + unsigned Align = TLI->getPrefLoopAlignment(); + if (!Align) + return false; // Don't care about loop alignment. + + bool Changed = false; + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); + I != E; ++I) + Changed |= AlignLoop(MF, *I, Align); + + return Changed; +} + +/// AlignLoop - Align loop headers to target preferred alignments. +/// +bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L, + unsigned Align) { + bool Changed = false; + + // Do alignment for nested loops. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + Changed |= AlignLoop(MF, *I, Align); + + L->getTopBlock()->setAlignment(Align); + Changed = true; + ++NumLoopsAligned; + + return Changed; +} + +bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) { + MLI = &getAnalysis<MachineLoopInfo>(); + if (MLI->empty()) + return false; // No loops. + + TLI = MF.getTarget().getTargetLowering(); + TII = MF.getTarget().getInstrInfo(); + + bool Changed = OptimizeIntraLoopEdges(MF); + + Changed |= AlignLoops(MF); + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp new file mode 100644 index 0000000..335d2d8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -0,0 +1,609 @@ +//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CriticalAntiDepBreaker class, which +// implements register anti-dependence breaking along a blocks +// critical path during post-RA scheduler. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "CriticalAntiDepBreaker.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +CriticalAntiDepBreaker:: +CriticalAntiDepBreaker(MachineFunction& MFi) : + AntiDepBreaker(), MF(MFi), + MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), + TRI(MF.getTarget().getRegisterInfo()), + AllocatableSet(TRI->getAllocatableSet(MF)), + Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)), + KillIndices(TRI->getNumRegs(), 0), + DefIndices(TRI->getNumRegs(), 0) {} + +CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { +} + +void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { + const unsigned BBSize = BB->size(); + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { + // Clear out the register class data. + Classes[i] = static_cast<const TargetRegisterClass *>(0); + + // Initialize the indices to indicate that no registers are live. + KillIndices[i] = ~0u; + DefIndices[i] = BBSize; + } + + // Clear "do not change" set. + KeepRegs.clear(); + + bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); + + // Determine the live-out physregs for this block. + if (IsReturnBlock) { + // In a return block, examine the function live-out regs. + for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), + E = MRI.liveout_end(); I != E; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } + } + + // In a non-return block, examine the live-in regs of all successors. + // Note a return block can have successors if the return instruction is + // predicated. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } + + // Mark live-out callee-saved registers. In a return block this is + // all callee-saved registers. In non-return this is any + // callee-saved register that is not saved in the prolog. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + BitVector Pristine = MFI->getPristineRegs(BB); + for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + unsigned Reg = *I; + if (!IsReturnBlock && !Pristine.test(Reg)) continue; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } +} + +void CriticalAntiDepBreaker::FinishBlock() { + RegRefs.clear(); + KeepRegs.clear(); +} + +void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) { + if (MI->isDebugValue()) + return; + assert(Count < InsertPosIndex && "Instruction index out of expected range!"); + + // Any register which was defined within the previous scheduling region + // may have been rescheduled and its lifetime may overlap with registers + // in ways not reflected in our current liveness state. For each such + // register, adjust the liveness state to be conservatively correct. + for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) + if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { + assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); + + // Mark this register to be non-renamable. + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + // Move the def index to the end of the previous region, to reflect + // that the def could theoretically have been scheduled at the end. + DefIndices[Reg] = InsertPosIndex; + } + + PrescanInstruction(MI); + ScanInstruction(MI, Count); +} + +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static const SDep *CriticalPathStep(const SUnit *SU) { + const SDep *Next = 0; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + const SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + return Next; +} + +void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { + // It's not safe to change register allocation for source operands of + // that have special allocation requirements. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->getDesc().isCall() || + MI->getDesc().hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + + // Scan the register operands for this instruction and update + // Classes and RegRefs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + const TargetRegisterClass *NewRC = 0; + + if (i < MI->getDesc().getNumOperands()) + NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + // Now check for aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + // If an alias of the reg is used during the live range, give up. + // Note that this allows us to skip checking if AntiDepReg + // overlaps with any of the aliases, among other things. + unsigned AliasReg = *Alias; + if (Classes[AliasReg]) { + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + } + } + + // If we're still willing to consider this register, note the reference. + if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) + RegRefs.insert(std::make_pair(Reg, &MO)); + + if (MO.isUse() && Special) { + if (KeepRegs.insert(Reg)) { + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + KeepRegs.insert(*Subreg); + } + } + } +} + +void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, + unsigned Count) { + // Update liveness. + // Proceding upwards, registers that are defed but not used in this + // instruction are now dead. + + if (!TII->isPredicated(MI)) { + // Predicated defs are modeled as read + write, i.e. similar to two + // address updates. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + DefIndices[Reg] = Count; + KillIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + KeepRegs.erase(Reg); + Classes[Reg] = 0; + RegRefs.erase(Reg); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + unsigned SubregReg = *Subreg; + DefIndices[SubregReg] = Count; + KillIndices[SubregReg] = ~0u; + KeepRegs.erase(SubregReg); + Classes[SubregReg] = 0; + RegRefs.erase(SubregReg); + } + // Conservatively mark super-registers as unusable. + for (const unsigned *Super = TRI->getSuperRegisters(Reg); + *Super; ++Super) { + unsigned SuperReg = *Super; + Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); + } + } + } + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isUse()) continue; + + const TargetRegisterClass *NewRC = 0; + if (i < MI->getDesc().getNumOperands()) + NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + RegRefs.insert(std::make_pair(Reg, &MO)); + + // It wasn't previously live but now it is, this is a kill. + if (KillIndices[Reg] == ~0u) { + KillIndices[Reg] = Count; + DefIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + } + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (KillIndices[AliasReg] == ~0u) { + KillIndices[AliasReg] = Count; + DefIndices[AliasReg] = ~0u; + } + } + } +} + +unsigned +CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI, + unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *RC) +{ + for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), + RE = RC->allocation_order_end(MF); R != RE; ++R) { + unsigned NewReg = *R; + // Don't consider non-allocatable registers + if (!AllocatableSet.test(NewReg)) continue; + // Don't replace a register with itself. + if (NewReg == AntiDepReg) continue; + // Don't replace a register with one that was recently used to repair + // an anti-dependence with this AntiDepReg, because that would + // re-introduce that anti-dependence. + if (NewReg == LastNewReg) continue; + // If the instruction already has a def of the NewReg, it's not suitable. + // For example, Instruction with multiple definitions can result in this + // condition. + if (MI->modifiesRegister(NewReg, TRI)) continue; + // If NewReg is dead and NewReg's most recent def is not before + // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. + assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) + && "Kill and Def maps aren't consistent for AntiDepReg!"); + assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) + && "Kill and Def maps aren't consistent for NewReg!"); + if (KillIndices[NewReg] != ~0u || + Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) || + KillIndices[AntiDepReg] > DefIndices[NewReg]) + continue; + return NewReg; + } + + // No registers are free and available! + return 0; +} + +unsigned CriticalAntiDepBreaker:: +BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex) { + // The code below assumes that there is at least one instruction, + // so just duck out immediately if the block is empty. + if (SUnits.empty()) return 0; + + // Keep a map of the MachineInstr*'s back to the SUnit representing them. + // This is used for updating debug information. + DenseMap<MachineInstr*,const SUnit*> MISUnitMap; + + // Find the node at the bottom of the critical path. + const SUnit *Max = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + const SUnit *SU = &SUnits[i]; + MISUnitMap[SU->getInstr()] = SU; + if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) + Max = SU; + } + +#ifndef NDEBUG + { + DEBUG(dbgs() << "Critical path has total latency " + << (Max->getDepth() + Max->Latency) << "\n"); + DEBUG(dbgs() << "Available regs:"); + for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { + if (KillIndices[Reg] == ~0u) + DEBUG(dbgs() << " " << TRI->getName(Reg)); + } + DEBUG(dbgs() << '\n'); + } +#endif + + // Track progress along the critical path through the SUnit graph as we walk + // the instructions. + const SUnit *CriticalPathSU = Max; + MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); + + // Consider this pattern: + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // There are three anti-dependencies here, and without special care, + // we'd break all of them using the same register: + // A = ... + // ... = A + // B = ... + // ... = B + // B = ... + // ... = B + // B = ... + // ... = B + // because at each anti-dependence, B is the first register that + // isn't A which is free. This re-introduces anti-dependencies + // at all but one of the original anti-dependencies that we were + // trying to break. To avoid this, keep track of the most recent + // register that each register was replaced with, avoid + // using it to repair an anti-dependence on the same register. + // This lets us produce this: + // A = ... + // ... = A + // B = ... + // ... = B + // C = ... + // ... = C + // B = ... + // ... = B + // This still has an anti-dependence on B, but at least it isn't on the + // original critical path. + // + // TODO: If we tracked more than one register here, we could potentially + // fix that remaining critical edge too. This is a little more involved, + // because unlike the most recent register, less recent registers should + // still be considered, though only if no other registers are available. + std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0); + + // Attempt to break anti-dependence edges on the critical path. Walk the + // instructions from the bottom up, tracking information about liveness + // as we go to help determine which registers are available. + unsigned Broken = 0; + unsigned Count = InsertPosIndex - 1; + for (MachineBasicBlock::iterator I = End, E = Begin; + I != E; --Count) { + MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; + + // Check if this instruction has a dependence on the critical path that + // is an anti-dependence that we may be able to break. If it is, set + // AntiDepReg to the non-zero register associated with the anti-dependence. + // + // We limit our attention to the critical path as a heuristic to avoid + // breaking anti-dependence edges that aren't going to significantly + // impact the overall schedule. There are a limited number of registers + // and we want to save them for the important edges. + // + // TODO: Instructions with multiple defs could have multiple + // anti-dependencies. The current code here only knows how to break one + // edge per instruction. Note that we'd have to be able to break all of + // the anti-dependencies in an instruction in order to be effective. + unsigned AntiDepReg = 0; + if (MI == CriticalPathMI) { + if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) { + const SUnit *NextSU = Edge->getSUnit(); + + // Only consider anti-dependence edges. + if (Edge->getKind() == SDep::Anti) { + AntiDepReg = Edge->getReg(); + assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); + if (!AllocatableSet.test(AntiDepReg)) + // Don't break anti-dependencies on non-allocatable registers. + AntiDepReg = 0; + else if (KeepRegs.count(AntiDepReg)) + // Don't break anti-dependencies if an use down below requires + // this exact register. + AntiDepReg = 0; + else { + // If the SUnit has other dependencies on the SUnit that it + // anti-depends on, don't bother breaking the anti-dependency + // since those edges would prevent such units from being + // scheduled past each other regardless. + // + // Also, if there are dependencies on other SUnits with the + // same register as the anti-dependency, don't attempt to + // break it. + for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(), + PE = CriticalPathSU->Preds.end(); P != PE; ++P) + if (P->getSUnit() == NextSU ? + (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : + (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + AntiDepReg = 0; + break; + } + } + } + CriticalPathSU = NextSU; + CriticalPathMI = CriticalPathSU->getInstr(); + } else { + // We've reached the end of the critical path. + CriticalPathSU = 0; + CriticalPathMI = 0; + } + } + + PrescanInstruction(MI); + + // If MI's defs have a special allocation requirement, don't allow + // any def registers to be changed. Also assume all registers + // defined in a call must not be changed (ABI). + if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) + // If this instruction's defs have special allocation requirement, don't + // break this anti-dependency. + AntiDepReg = 0; + else if (AntiDepReg) { + // If this instruction has a use of AntiDepReg, breaking it + // is invalid. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { + AntiDepReg = 0; + break; + } + } + } + + // Determine AntiDepReg's register class, if it is live and is + // consistently used within a single class. + const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; + assert((AntiDepReg == 0 || RC != NULL) && + "Register should be live if it's causing an anti-dependence!"); + if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) + AntiDepReg = 0; + + // Look for a suitable register to use to break the anti-depenence. + // + // TODO: Instead of picking the first free register, consider which might + // be the best. + if (AntiDepReg != 0) { + if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg, + LastNewReg[AntiDepReg], + RC)) { + DEBUG(dbgs() << "Breaking anti-dependence edge on " + << TRI->getName(AntiDepReg) + << " with " << RegRefs.count(AntiDepReg) << " references" + << " using " << TRI->getName(NewReg) << "!\n"); + + // Update the references to the old register to refer to the new + // register. + std::pair<std::multimap<unsigned, MachineOperand *>::iterator, + std::multimap<unsigned, MachineOperand *>::iterator> + Range = RegRefs.equal_range(AntiDepReg); + for (std::multimap<unsigned, MachineOperand *>::iterator + Q = Range.first, QE = Range.second; Q != QE; ++Q) { + Q->second->setReg(NewReg); + // If the SU for the instruction being updated has debug information + // related to the anti-dependency register, make sure to update that + // as well. + const SUnit *SU = MISUnitMap[Q->second->getParent()]; + if (!SU) continue; + for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) { + MachineInstr *DI = SU->DbgInstrList[i]; + assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() + && "Non register dbg_value attached to SUnit!"); + if (DI->getOperand(0).getReg() == AntiDepReg) + DI->getOperand(0).setReg(NewReg); + } + } + + // We just went back in time and modified history; the + // liveness information for the anti-depenence reg is now + // inconsistent. Set the state as if it were dead. + Classes[NewReg] = Classes[AntiDepReg]; + DefIndices[NewReg] = DefIndices[AntiDepReg]; + KillIndices[NewReg] = KillIndices[AntiDepReg]; + assert(((KillIndices[NewReg] == ~0u) != + (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + + Classes[AntiDepReg] = 0; + DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; + KillIndices[AntiDepReg] = ~0u; + assert(((KillIndices[AntiDepReg] == ~0u) != + (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + + RegRefs.erase(AntiDepReg); + LastNewReg[AntiDepReg] = NewReg; + ++Broken; + } + } + + ScanInstruction(MI, Count); + } + + return Broken; +} diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h new file mode 100644 index 0000000..0ed7c35 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -0,0 +1,100 @@ +//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CriticalAntiDepBreaker class, which +// implements register anti-dependence breaking along a blocks +// critical path during post-RA scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H +#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H + +#include "AntiDepBreaker.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include <map> + +namespace llvm { +class TargetInstrInfo; +class TargetRegisterInfo; + + class CriticalAntiDepBreaker : public AntiDepBreaker { + MachineFunction& MF; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + + /// AllocatableSet - The set of allocatable registers. + /// We'll be ignoring anti-dependencies on non-allocatable registers, + /// because they may not be safe to break. + const BitVector AllocatableSet; + + /// Classes - For live regs that are only used in one register class in a + /// live range, the register class. If the register is not live, the + /// corresponding value is null. If the register is live but used in + /// multiple register classes, the corresponding value is -1 casted to a + /// pointer. + std::vector<const TargetRegisterClass*> Classes; + + /// RegRegs - Map registers to all their references within a live range. + std::multimap<unsigned, MachineOperand *> RegRefs; + + /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// or ~0u if the register is not live. + std::vector<unsigned> KillIndices; + + /// DefIndices - The index of the most recent complete def (proceding bottom + /// up), or ~0u if the register is live. + std::vector<unsigned> DefIndices; + + /// KeepRegs - A set of registers which are live and cannot be changed to + /// break anti-dependencies. + SmallSet<unsigned, 4> KeepRegs; + + public: + CriticalAntiDepBreaker(MachineFunction& MFi); + ~CriticalAntiDepBreaker(); + + /// Start - Initialize anti-dep breaking for a new basic block. + void StartBlock(MachineBasicBlock *BB); + + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical + /// path + /// of the ScheduleDAG and break them by renaming registers. + /// + unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex); + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex); + + /// Finish - Finish anti-dep breaking for a basic block. + void FinishBlock(); + + private: + void PrescanInstruction(MachineInstr *MI); + void ScanInstruction(MachineInstr *MI, unsigned Count); + unsigned findSuitableFreeRegister(MachineInstr *MI, + unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *); + }; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp new file mode 100644 index 0000000..318d922 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -0,0 +1,188 @@ +//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an extremely simple MachineInstr-level dead-code-elimination pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "codegen-dce" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumDeletes, "Number of dead instructions deleted"); + +namespace { + class DeadMachineInstructionElim : public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &MF); + + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + BitVector LivePhysRegs; + + public: + static char ID; // Pass identification, replacement for typeid + DeadMachineInstructionElim() : MachineFunctionPass(ID) {} + + private: + bool isDead(const MachineInstr *MI) const; + }; +} +char DeadMachineInstructionElim::ID = 0; + +INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination", + "Remove dead machine instructions", false, false); + +FunctionPass *llvm::createDeadMachineInstructionElimPass() { + return new DeadMachineInstructionElim(); +} + +bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { + // Don't delete instructions with side effects. + bool SawStore = false; + if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI()) + return false; + + // Examine each operand. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg) ? + LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) { + // This def has a non-debug use. Don't delete the instruction! + return false; + } + } + } + + // If there are no defs with uses, the instruction is dead. + return true; +} + +bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { + bool AnyChanges = false; + MRI = &MF.getRegInfo(); + TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + + // Treat reserved registers as always live. + BitVector ReservedRegs = TRI->getReservedRegs(MF); + + // Loop over all instructions in all blocks, from bottom to top, so that it's + // more likely that chains of dependent but ultimately dead instructions will + // be cleaned up. + for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend(); + I != E; ++I) { + MachineBasicBlock *MBB = &*I; + + // Start out assuming that reserved registers are live out of this block. + LivePhysRegs = ReservedRegs; + + // Also add any explicit live-out physregs for this block. + if (!MBB->empty() && MBB->back().getDesc().isReturn()) + for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), + LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { + unsigned Reg = *LOI; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + LivePhysRegs.set(Reg); + } + + // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs + // are not live across blocks, but some targets (x86) can have flags live + // out of a block. + + // Now scan the instructions and delete dead ones, tracking physreg + // liveness as we go. + for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), + MIE = MBB->rend(); MII != MIE; ) { + MachineInstr *MI = &*MII; + + // If the instruction is dead, delete it! + if (isDead(MI)) { + DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); + // It is possible that some DBG_VALUE instructions refer to this + // instruction. Examine each def operand for such references; + // if found, mark the DBG_VALUE as undef (but don't delete it). + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MachineRegisterInfo::use_iterator nextI; + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), + E = MRI->use_end(); I!=E; I=nextI) { + nextI = llvm::next(I); // I is invalidated by the setReg + MachineOperand& Use = I.getOperand(); + MachineInstr *UseMI = Use.getParent(); + if (UseMI==MI) + continue; + assert(Use.isDebug()); + UseMI->getOperand(0).setReg(0U); + } + } + AnyChanges = true; + MI->eraseFromParent(); + ++NumDeletes; + MIE = MBB->rend(); + // MII is now pointing to the next instruction to process, + // so don't increment it. + continue; + } + + // Record the physreg defs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + LivePhysRegs.reset(Reg); + // Check the subreg set, not the alias set, because a def + // of a super-register may still be partially live after + // this def. + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) + LivePhysRegs.reset(*SubRegs); + } + } + } + // Record the physreg uses, after the defs, in case a physreg is + // both defined and used in the same instruction. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse()) { + unsigned Reg = MO.getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + LivePhysRegs.set(Reg); + for (const unsigned *AliasSet = TRI->getAliasSet(Reg); + *AliasSet; ++AliasSet) + LivePhysRegs.set(*AliasSet); + } + } + } + + // We didn't delete the current instruction, so increment MII to + // the next one. + ++MII; + } + } + + LivePhysRegs.clear(); + return AnyChanges; +} diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp new file mode 100644 index 0000000..550fd3e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -0,0 +1,664 @@ +//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass mulches exception handling code into a form adapted to code +// generation. Required if using dwarf exception handling. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfehprepare" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +using namespace llvm; + +STATISTIC(NumLandingPadsSplit, "Number of landing pads split"); +STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered"); +STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved"); + +namespace { + class DwarfEHPrepare : public FunctionPass { + const TargetMachine *TM; + const TargetLowering *TLI; + + // The eh.exception intrinsic. + Function *ExceptionValueIntrinsic; + + // The eh.selector intrinsic. + Function *SelectorIntrinsic; + + // _Unwind_Resume_or_Rethrow call. + Constant *URoR; + + // The EH language-specific catch-all type. + GlobalVariable *EHCatchAllValue; + + // _Unwind_Resume or the target equivalent. + Constant *RewindFunction; + + // We both use and preserve dominator info. + DominatorTree *DT; + + // The function we are running on. + Function *F; + + // The landing pads for this function. + typedef SmallPtrSet<BasicBlock*, 8> BBSet; + BBSet LandingPads; + + bool NormalizeLandingPads(); + bool LowerUnwinds(); + bool MoveExceptionValueCalls(); + + Instruction *CreateExceptionValueCall(BasicBlock *BB); + + /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still + /// use the "llvm.eh.catch.all.value" call need to convert to using its + /// initializer instead. + bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels); + + bool HasCatchAllInSelector(IntrinsicInst *); + + /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. + void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels, + SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels); + + /// FindAllURoRInvokes - Find all URoR invokes in the function. + void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes); + + /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" + /// calls. The "unwind" part of these invokes jump to a landing pad within + /// the current function. This is a candidate to merge the selector + /// associated with the URoR invoke with the one from the URoR's landing + /// pad. + bool HandleURoRInvokes(); + + /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated + /// with the eh.exception call. This recursively looks past instructions + /// which don't change the EH pointer value, like casts or PHI nodes. + bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, + SmallPtrSet<IntrinsicInst*, 8> &SelCalls); + + public: + static char ID; // Pass identification, replacement for typeid. + DwarfEHPrepare(const TargetMachine *tm) : + FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()), + ExceptionValueIntrinsic(0), SelectorIntrinsic(0), + URoR(0), EHCatchAllValue(0), RewindFunction(0) {} + + virtual bool runOnFunction(Function &Fn); + + // getAnalysisUsage - We need the dominator tree for handling URoR. + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.addPreserved<DominatorTree>(); + } + + const char *getPassName() const { + return "Exception handling preparation"; + } + + }; +} // end anonymous namespace + +char DwarfEHPrepare::ID = 0; + +FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { + return new DwarfEHPrepare(tm); +} + +/// HasCatchAllInSelector - Return true if the intrinsic instruction has a +/// catch-all. +bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) { + if (!EHCatchAllValue) return false; + + unsigned ArgIdx = II->getNumArgOperands() - 1; + GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx)); + return GV == EHCatchAllValue; +} + +/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. +void DwarfEHPrepare:: +FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels, + SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) { + for (Value::use_iterator + I = SelectorIntrinsic->use_begin(), + E = SelectorIntrinsic->use_end(); I != E; ++I) { + IntrinsicInst *II = cast<IntrinsicInst>(*I); + + if (II->getParent()->getParent() != F) + continue; + + if (!HasCatchAllInSelector(II)) + Sels.insert(II); + else + CatchAllSels.insert(II); + } +} + +/// FindAllURoRInvokes - Find all URoR invokes in the function. +void DwarfEHPrepare:: +FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) { + for (Value::use_iterator + I = URoR->use_begin(), + E = URoR->use_end(); I != E; ++I) { + if (InvokeInst *II = dyn_cast<InvokeInst>(*I)) + URoRInvokes.insert(II); + } +} + +/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use +/// the "llvm.eh.catch.all.value" call need to convert to using its +/// initializer instead. +bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) { + if (!EHCatchAllValue) return false; + + if (!SelectorIntrinsic) { + SelectorIntrinsic = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); + if (!SelectorIntrinsic) return false; + } + + bool Changed = false; + for (SmallPtrSet<IntrinsicInst*, 32>::iterator + I = Sels.begin(), E = Sels.end(); I != E; ++I) { + IntrinsicInst *Sel = *I; + + // Index of the "llvm.eh.catch.all.value" variable. + unsigned OpIdx = Sel->getNumArgOperands() - 1; + GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx)); + if (GV != EHCatchAllValue) continue; + Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer()); + Changed = true; + } + + return Changed; +} + +/// FindSelectorAndURoR - Find the eh.selector call associated with the +/// eh.exception call. And indicate if there is a URoR "invoke" associated with +/// the eh.exception call. This recursively looks past instructions which don't +/// change the EH pointer value, like casts or PHI nodes. +bool +DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, + SmallPtrSet<IntrinsicInst*, 8> &SelCalls) { + SmallPtrSet<PHINode*, 32> SeenPHIs; + bool Changed = false; + + for (Value::use_iterator + I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) { + Instruction *II = dyn_cast<Instruction>(*I); + if (!II || II->getParent()->getParent() != F) continue; + + if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) { + if (Sel->getIntrinsicID() == Intrinsic::eh_selector) + SelCalls.insert(Sel); + } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) { + if (Invoke->getCalledFunction() == URoR) + URoRInvoke = true; + } else if (CastInst *CI = dyn_cast<CastInst>(II)) { + Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls); + } else if (PHINode *PN = dyn_cast<PHINode>(II)) { + if (SeenPHIs.insert(PN)) + // Don't process a PHI node more than once. + Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls); + } + } + + return Changed; +} + +/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The +/// "unwind" part of these invokes jump to a landing pad within the current +/// function. This is a candidate to merge the selector associated with the URoR +/// invoke with the one from the URoR's landing pad. +bool DwarfEHPrepare::HandleURoRInvokes() { + if (!EHCatchAllValue) { + EHCatchAllValue = + F->getParent()->getNamedGlobal("llvm.eh.catch.all.value"); + if (!EHCatchAllValue) return false; + } + + if (!SelectorIntrinsic) { + SelectorIntrinsic = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); + if (!SelectorIntrinsic) return false; + } + + SmallPtrSet<IntrinsicInst*, 32> Sels; + SmallPtrSet<IntrinsicInst*, 32> CatchAllSels; + FindAllCleanupSelectors(Sels, CatchAllSels); + + if (!URoR) { + URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); + if (!URoR) return CleanupSelectors(CatchAllSels); + } + + SmallPtrSet<InvokeInst*, 32> URoRInvokes; + FindAllURoRInvokes(URoRInvokes); + + SmallPtrSet<IntrinsicInst*, 32> SelsToConvert; + + for (SmallPtrSet<IntrinsicInst*, 32>::iterator + SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) { + const BasicBlock *SelBB = (*SI)->getParent(); + for (SmallPtrSet<InvokeInst*, 32>::iterator + UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) { + const BasicBlock *URoRBB = (*UI)->getParent(); + if (DT->dominates(SelBB, URoRBB)) { + SelsToConvert.insert(*SI); + break; + } + } + } + + bool Changed = false; + + if (Sels.size() != SelsToConvert.size()) { + // If we haven't been able to convert all of the clean-up selectors, then + // loop through the slow way to see if they still need to be converted. + if (!ExceptionValueIntrinsic) { + ExceptionValueIntrinsic = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception); + if (!ExceptionValueIntrinsic) + return CleanupSelectors(CatchAllSels); + } + + for (Value::use_iterator + I = ExceptionValueIntrinsic->use_begin(), + E = ExceptionValueIntrinsic->use_end(); I != E; ++I) { + IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I); + if (!EHPtr || EHPtr->getParent()->getParent() != F) continue; + + bool URoRInvoke = false; + SmallPtrSet<IntrinsicInst*, 8> SelCalls; + Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls); + + if (URoRInvoke) { + // This EH pointer is being used by an invoke of an URoR instruction and + // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we + // need to convert it to a 'catch-all'. + for (SmallPtrSet<IntrinsicInst*, 8>::iterator + SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) + if (!HasCatchAllInSelector(*SI)) + SelsToConvert.insert(*SI); + } + } + } + + if (!SelsToConvert.empty()) { + // Convert all clean-up eh.selectors, which are associated with "invokes" of + // URoR calls, into catch-all eh.selectors. + Changed = true; + + for (SmallPtrSet<IntrinsicInst*, 8>::iterator + SI = SelsToConvert.begin(), SE = SelsToConvert.end(); + SI != SE; ++SI) { + IntrinsicInst *II = *SI; + + // Use the exception object pointer and the personality function + // from the original selector. + CallSite CS(II); + IntrinsicInst::op_iterator I = CS.arg_begin(); + IntrinsicInst::op_iterator E = CS.arg_end(); + IntrinsicInst::op_iterator B = prior(E); + + // Exclude last argument if it is an integer. + if (isa<ConstantInt>(B)) E = B; + + // Add exception object pointer (front). + // Add personality function (next). + // Add in any filter IDs (rest). + SmallVector<Value*, 8> Args(I, E); + + Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. + + CallInst *NewSelector = + CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(), + "eh.sel.catch.all", II); + + NewSelector->setTailCall(II->isTailCall()); + NewSelector->setAttributes(II->getAttributes()); + NewSelector->setCallingConv(II->getCallingConv()); + + II->replaceAllUsesWith(NewSelector); + II->eraseFromParent(); + } + } + + Changed |= CleanupSelectors(CatchAllSels); + return Changed; +} + +/// NormalizeLandingPads - Normalize and discover landing pads, noting them +/// in the LandingPads set. A landing pad is normal if the only CFG edges +/// that end at it are unwind edges from invoke instructions. If we inlined +/// through an invoke we could have a normal branch from the previous +/// unwind block through to the landing pad for the original invoke. +/// Abnormal landing pads are fixed up by redirecting all unwind edges to +/// a new basic block which falls through to the original. +bool DwarfEHPrepare::NormalizeLandingPads() { + bool Changed = false; + + const MCAsmInfo *MAI = TM->getMCAsmInfo(); + bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + TerminatorInst *TI = I->getTerminator(); + if (!isa<InvokeInst>(TI)) + continue; + BasicBlock *LPad = TI->getSuccessor(1); + // Skip landing pads that have already been normalized. + if (LandingPads.count(LPad)) + continue; + + // Check that only invoke unwind edges end at the landing pad. + bool OnlyUnwoundTo = true; + bool SwitchOK = usingSjLjEH; + for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); + PI != PE; ++PI) { + TerminatorInst *PT = (*PI)->getTerminator(); + // The SjLj dispatch block uses a switch instruction. This is effectively + // an unwind edge, so we can disregard it here. There will only ever + // be one dispatch, however, so if there are multiple switches, one + // of them truly is a normal edge, not an unwind edge. + if (SwitchOK && isa<SwitchInst>(PT)) { + SwitchOK = false; + continue; + } + if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) { + OnlyUnwoundTo = false; + break; + } + } + + if (OnlyUnwoundTo) { + // Only unwind edges lead to the landing pad. Remember the landing pad. + LandingPads.insert(LPad); + continue; + } + + // At least one normal edge ends at the landing pad. Redirect the unwind + // edges to a new basic block which falls through into this one. + + // Create the new basic block. + BasicBlock *NewBB = BasicBlock::Create(F->getContext(), + LPad->getName() + "_unwind_edge"); + + // Insert it into the function right before the original landing pad. + LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); + + // Redirect unwind edges from the original landing pad to NewBB. + for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) { + TerminatorInst *PT = (*PI++)->getTerminator(); + if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad) + // Unwind to the new block. + PT->setSuccessor(1, NewBB); + } + + // If there are any PHI nodes in LPad, we need to update them so that they + // merge incoming values from NewBB instead. + for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) { + PHINode *PN = cast<PHINode>(II); + pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB); + + // Check to see if all of the values coming in via unwind edges are the + // same. If so, we don't need to create a new PHI node. + Value *InVal = PN->getIncomingValueForBlock(*PB); + for (pred_iterator PI = PB; PI != PE; ++PI) { + if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) { + InVal = 0; + break; + } + } + + if (InVal == 0) { + // Different unwind edges have different values. Create a new PHI node + // in NewBB. + PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind", + NewBB); + // Add an entry for each unwind edge, using the value from the old PHI. + for (pred_iterator PI = PB; PI != PE; ++PI) + NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); + + // Now use this new PHI as the common incoming value for NewBB in PN. + InVal = NewPN; + } + + // Revector exactly one entry in the PHI node to come from NewBB + // and delete all other entries that come from unwind edges. If + // there are both normal and unwind edges from the same predecessor, + // this leaves an entry for the normal edge. + for (pred_iterator PI = PB; PI != PE; ++PI) + PN->removeIncomingValue(*PI); + PN->addIncoming(InVal, NewBB); + } + + // Add a fallthrough from NewBB to the original landing pad. + BranchInst::Create(LPad, NewBB); + + // Now update DominatorTree analysis information. + DT->splitBlock(NewBB); + + // Remember the newly constructed landing pad. The original landing pad + // LPad is no longer a landing pad now that all unwind edges have been + // revectored to NewBB. + LandingPads.insert(NewBB); + ++NumLandingPadsSplit; + Changed = true; + } + + return Changed; +} + +/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume, +/// rethrowing any previously caught exception. This will crash horribly +/// at runtime if there is no such exception: using unwind to throw a new +/// exception is currently not supported. +bool DwarfEHPrepare::LowerUnwinds() { + SmallVector<TerminatorInst*, 16> UnwindInsts; + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + TerminatorInst *TI = I->getTerminator(); + if (isa<UnwindInst>(TI)) + UnwindInsts.push_back(TI); + } + + if (UnwindInsts.empty()) return false; + + // Find the rewind function if we didn't already. + if (!RewindFunction) { + LLVMContext &Ctx = UnwindInsts[0]->getContext(); + std::vector<const Type*> + Params(1, Type::getInt8PtrTy(Ctx)); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), + Params, false); + const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); + RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); + } + + bool Changed = false; + + for (SmallVectorImpl<TerminatorInst*>::iterator + I = UnwindInsts.begin(), E = UnwindInsts.end(); I != E; ++I) { + TerminatorInst *TI = *I; + + // Replace the unwind instruction with a call to _Unwind_Resume (or the + // appropriate target equivalent) followed by an UnreachableInst. + + // Create the call... + CallInst *CI = CallInst::Create(RewindFunction, + CreateExceptionValueCall(TI->getParent()), + "", TI); + CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + // ...followed by an UnreachableInst. + new UnreachableInst(TI->getContext(), TI); + + // Nuke the unwind instruction. + TI->eraseFromParent(); + ++NumUnwindsLowered; + Changed = true; + } + + return Changed; +} + +/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from +/// landing pads by replacing calls outside of landing pads with direct use of +/// a register holding the appropriate value; this requires adding calls inside +/// all landing pads to initialize the register. Also, move eh.exception calls +/// inside landing pads to the start of the landing pad (optional, but may make +/// things simpler for later passes). +bool DwarfEHPrepare::MoveExceptionValueCalls() { + // If the eh.exception intrinsic is not declared in the module then there is + // nothing to do. Speed up compilation by checking for this common case. + if (!ExceptionValueIntrinsic && + !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception))) + return false; + + bool Changed = false; + + // Move calls to eh.exception that are inside a landing pad to the start of + // the landing pad. + for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end(); + LI != LE; ++LI) { + BasicBlock *LP = *LI; + for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end(); + II != IE;) + if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) { + // Found a call to eh.exception. + if (!EI->use_empty()) { + // If there is already a call to eh.exception at the start of the + // landing pad, then get hold of it; otherwise create such a call. + Value *CallAtStart = CreateExceptionValueCall(LP); + + // If the call was at the start of a landing pad then leave it alone. + if (EI == CallAtStart) + continue; + EI->replaceAllUsesWith(CallAtStart); + } + EI->eraseFromParent(); + ++NumExceptionValuesMoved; + Changed = true; + } + } + + // Look for calls to eh.exception that are not in a landing pad. If one is + // found, then a register that holds the exception value will be created in + // each landing pad, and the SSAUpdater will be used to compute the values + // returned by eh.exception calls outside of landing pads. + SSAUpdater SSA; + + // Remember where we found the eh.exception call, to avoid rescanning earlier + // basic blocks which we already know contain no eh.exception calls. + bool FoundCallOutsideLandingPad = false; + Function::iterator BB = F->begin(); + for (Function::iterator BE = F->end(); BB != BE; ++BB) { + // Skip over landing pads. + if (LandingPads.count(BB)) + continue; + + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE; ++II) + if (isa<EHExceptionInst>(II)) { + SSA.Initialize(II->getType(), II->getName()); + FoundCallOutsideLandingPad = true; + break; + } + + if (FoundCallOutsideLandingPad) + break; + } + + // If all calls to eh.exception are in landing pads then we are done. + if (!FoundCallOutsideLandingPad) + return Changed; + + // Add a call to eh.exception at the start of each landing pad, and tell the + // SSAUpdater that this is the value produced by the landing pad. + for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end(); + LI != LE; ++LI) + SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI)); + + // Now turn all calls to eh.exception that are not in a landing pad into a use + // of the appropriate register. + for (Function::iterator BE = F->end(); BB != BE; ++BB) { + // Skip over landing pads. + if (LandingPads.count(BB)) + continue; + + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE;) + if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) { + // Found a call to eh.exception, replace it with the value from any + // upstream landing pad(s). + EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB)); + EI->eraseFromParent(); + ++NumExceptionValuesMoved; + } + } + + return true; +} + +/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at +/// the start of the basic block (unless there already is one, in which case +/// the existing call is returned). +Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) { + Instruction *Start = BB->getFirstNonPHIOrDbg(); + // Is this a call to eh.exception? + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start)) + if (CI->getIntrinsicID() == Intrinsic::eh_exception) + // Reuse the existing call. + return Start; + + // Find the eh.exception intrinsic if we didn't already. + if (!ExceptionValueIntrinsic) + ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::eh_exception); + + // Create the call. + return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start); +} + +bool DwarfEHPrepare::runOnFunction(Function &Fn) { + bool Changed = false; + + // Initialize internal state. + DT = &getAnalysis<DominatorTree>(); + F = &Fn; + + // Ensure that only unwind edges end at landing pads (a landing pad is a + // basic block where an invoke unwind edge ends). + Changed |= NormalizeLandingPads(); + + // Turn unwind instructions into libcalls. + Changed |= LowerUnwinds(); + + // TODO: Move eh.selector calls to landing pads and combine them. + + // Move eh.exception calls to landing pads. + Changed |= MoveExceptionValueCalls(); + + Changed |= HandleURoRInvokes(); + + LandingPads.clear(); + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/ELF.h b/contrib/llvm/lib/CodeGen/ELF.h new file mode 100644 index 0000000..fb884c9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ELF.h @@ -0,0 +1,227 @@ +//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header contains common, non-processor-specific data structures and +// constants for the ELF file format. +// +// The details of the ELF32 bits in this file are largely based on the Tool +// Interface Standard (TIS) Executable and Linking Format (ELF) Specification +// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the +// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998 +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ELF_H +#define CODEGEN_ELF_H + +#include "llvm/CodeGen/BinaryObject.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/Support/ELF.h" +#include "llvm/System/DataTypes.h" + +namespace llvm { + class GlobalValue; + + /// ELFSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct ELFSym { + + // ELF symbols are related to llvm ones by being one of the two llvm + // types, for the other ones (section, file, func) a null pointer is + // assumed by default. + union { + const GlobalValue *GV; // If this is a pointer to a GV + const char *Ext; // If this is a pointer to a named symbol + } Source; + + // Describes from which source type this ELF symbol comes from, + // they can be GlobalValue, ExternalSymbol or neither. + enum { + isGV, // The Source.GV field is valid. + isExtSym, // The Source.ExtSym field is valid. + isOther // Not a GlobalValue or External Symbol + }; + unsigned SourceType; + + bool isGlobalValue() const { return SourceType == isGV; } + bool isExternalSym() const { return SourceType == isExtSym; } + + // getGlobalValue - If this is a global value which originated the + // elf symbol, return a reference to it. + const GlobalValue *getGlobalValue() const { + assert(SourceType == isGV && "This is not a global value"); + return Source.GV; + } + + // getExternalSym - If this is an external symbol which originated the + // elf symbol, return a reference to it. + const char *getExternalSymbol() const { + assert(SourceType == isExtSym && "This is not an external symbol"); + return Source.Ext; + } + + // getGV - From a global value return a elf symbol to represent it + static ELFSym *getGV(const GlobalValue *GV, unsigned Bind, + unsigned Type, unsigned Visibility) { + ELFSym *Sym = new ELFSym(); + Sym->Source.GV = GV; + Sym->setBind(Bind); + Sym->setType(Type); + Sym->setVisibility(Visibility); + Sym->SourceType = isGV; + return Sym; + } + + // getExtSym - Create and return an elf symbol to represent an + // external symbol + static ELFSym *getExtSym(const char *Ext) { + ELFSym *Sym = new ELFSym(); + Sym->Source.Ext = Ext; + Sym->setBind(ELF::STB_GLOBAL); + Sym->setType(ELF::STT_NOTYPE); + Sym->setVisibility(ELF::STV_DEFAULT); + Sym->SourceType = isExtSym; + return Sym; + } + + // getSectionSym - Returns a elf symbol to represent an elf section + static ELFSym *getSectionSym() { + ELFSym *Sym = new ELFSym(); + Sym->setBind(ELF::STB_LOCAL); + Sym->setType(ELF::STT_SECTION); + Sym->setVisibility(ELF::STV_DEFAULT); + Sym->SourceType = isOther; + return Sym; + } + + // getFileSym - Returns a elf symbol to represent the module identifier + static ELFSym *getFileSym() { + ELFSym *Sym = new ELFSym(); + Sym->setBind(ELF::STB_LOCAL); + Sym->setType(ELF::STT_FILE); + Sym->setVisibility(ELF::STV_DEFAULT); + Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS; + Sym->SourceType = isOther; + return Sym; + } + + // getUndefGV - Returns a STT_NOTYPE symbol + static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) { + ELFSym *Sym = new ELFSym(); + Sym->Source.GV = GV; + Sym->setBind(Bind); + Sym->setType(ELF::STT_NOTYPE); + Sym->setVisibility(ELF::STV_DEFAULT); + Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF; + Sym->SourceType = isGV; + return Sym; + } + + // ELF specific fields + unsigned NameIdx; // Index in .strtab of name, once emitted. + uint64_t Value; + unsigned Size; + uint8_t Info; + uint8_t Other; + unsigned short SectionIdx; + + // Symbol index into the Symbol table + unsigned SymTabIdx; + + ELFSym() : SourceType(isOther), NameIdx(0), Value(0), + Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0), + SymTabIdx(0) {} + + unsigned getBind() const { return (Info >> 4) & 0xf; } + unsigned getType() const { return Info & 0xf; } + bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; } + bool isFileType() const { return getType() == ELF::STT_FILE; } + + void setBind(unsigned X) { + assert(X == (X & 0xF) && "Bind value out of range!"); + Info = (Info & 0x0F) | (X << 4); + } + + void setType(unsigned X) { + assert(X == (X & 0xF) && "Type value out of range!"); + Info = (Info & 0xF0) | X; + } + + void setVisibility(unsigned V) { + assert(V == (V & 0x3) && "Visibility value out of range!"); + Other = V; + } + }; + + /// ELFSection - This struct contains information about each section that is + /// emitted to the file. This is eventually turned into the section header + /// table at the end of the file. + class ELFSection : public BinaryObject { + public: + // ELF specific fields + unsigned NameIdx; // sh_name - .shstrtab idx of name, once emitted. + unsigned Type; // sh_type - Section contents & semantics + unsigned Flags; // sh_flags - Section flags. + uint64_t Addr; // sh_addr - The mem addr this section is in. + unsigned Offset; // sh_offset - Offset from the file start + unsigned Size; // sh_size - The section size. + unsigned Link; // sh_link - Section header table index link. + unsigned Info; // sh_info - Auxillary information. + unsigned Align; // sh_addralign - Alignment of section. + unsigned EntSize; // sh_entsize - Size of entries in the section e + + /// SectionIdx - The number of the section in the Section Table. + unsigned short SectionIdx; + + /// Sym - The symbol to represent this section if it has one. + ELFSym *Sym; + + /// getSymIndex - Returns the symbol table index of the symbol + /// representing this section. + unsigned getSymbolTableIndex() const { + assert(Sym && "section not present in the symbol table"); + return Sym->SymTabIdx; + } + + ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit) + : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0), + Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {} + }; + + /// ELFRelocation - This class contains all the information necessary to + /// to generate any 32-bit or 64-bit ELF relocation entry. + class ELFRelocation { + uint64_t r_offset; // offset in the section of the object this applies to + uint32_t r_symidx; // symbol table index of the symbol to use + uint32_t r_type; // machine specific relocation type + int64_t r_add; // explicit relocation addend + bool r_rela; // if true then the addend is part of the entry + // otherwise the addend is at the location specified + // by r_offset + public: + uint64_t getInfo(bool is64Bit) const { + if (is64Bit) + return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL); + else + return (r_symidx << 8) + (r_type & 0xFFL); + } + + uint64_t getOffset() const { return r_offset; } + int64_t getAddend() const { return r_add; } + + ELFRelocation(uint64_t off, uint32_t sym, uint32_t type, + bool rela = true, int64_t addend = 0) : + r_offset(off), r_symidx(sym), r_type(type), + r_add(addend), r_rela(rela) {} + }; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp new file mode 100644 index 0000000..3fb087c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp @@ -0,0 +1,205 @@ +//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "elfce" + +#include "ELF.h" +#include "ELFWriter.h" +#include "ELFCodeEmitter.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/BinaryObject.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetELFWriterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +//===----------------------------------------------------------------------===// +// ELFCodeEmitter Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + +/// startFunction - This callback is invoked when a new machine function is +/// about to be emitted. +void ELFCodeEmitter::startFunction(MachineFunction &MF) { + DEBUG(dbgs() << "processing function: " + << MF.getFunction()->getName() << "\n"); + + // Get the ELF Section that this function belongs in. + ES = &EW.getTextSection(MF.getFunction()); + + // Set the desired binary object to be used by the code emitters + setBinaryObject(ES); + + // Get the function alignment in bytes + unsigned Align = (1 << MF.getAlignment()); + + // The function must start on its required alignment + ES->emitAlignment(Align); + + // Update the section alignment if needed. + ES->Align = std::max(ES->Align, Align); + + // Record the function start offset + FnStartOff = ES->getCurrentPCOffset(); + + // Emit constant pool and jump tables to their appropriate sections. + // They need to be emitted before the function because in some targets + // the later may reference JT or CP entry address. + emitConstantPool(MF.getConstantPool()); + if (MF.getJumpTableInfo()) + emitJumpTables(MF.getJumpTableInfo()); +} + +/// finishFunction - This callback is invoked after the function is completely +/// finished. +bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { + // Add a symbol to represent the function. + const Function *F = MF.getFunction(); + ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC, + EW.getGlobalELFVisibility(F)); + FnSym->SectionIdx = ES->SectionIdx; + FnSym->Size = ES->getCurrentPCOffset()-FnStartOff; + EW.AddPendingGlobalSymbol(F, true); + + // Offset from start of Section + FnSym->Value = FnStartOff; + + if (!F->hasPrivateLinkage()) + EW.SymbolList.push_back(FnSym); + + // Patch up Jump Table Section relocations to use the real MBBs offsets + // now that the MBB label offsets inside the function are known. + if (MF.getJumpTableInfo()) { + ELFSection &JTSection = EW.getJumpTableSection(); + for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(), + MRE = JTRelocations.end(); MRI != MRE; ++MRI) { + MachineRelocation &MR = *MRI; + uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock()); + MR.setResultPointer((void*)MBBOffset); + MR.setConstantVal(ES->SectionIdx); + JTSection.addRelocation(MR); + } + } + + // If we have emitted any relocations to function-specific objects such as + // basic blocks, constant pools entries, or jump tables, record their + // addresses now so that we can rewrite them with the correct addresses later + for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { + MachineRelocation &MR = Relocations[i]; + intptr_t Addr; + if (MR.isGlobalValue()) { + EW.AddPendingGlobalSymbol(MR.getGlobalValue()); + } else if (MR.isExternalSymbol()) { + EW.AddPendingExternalSymbol(MR.getExternalSymbol()); + } else if (MR.isBasicBlock()) { + Addr = getMachineBasicBlockAddress(MR.getBasicBlock()); + MR.setConstantVal(ES->SectionIdx); + MR.setResultPointer((void*)Addr); + } else if (MR.isConstantPoolIndex()) { + Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex()); + MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]); + MR.setResultPointer((void*)Addr); + } else if (MR.isJumpTableIndex()) { + ELFSection &JTSection = EW.getJumpTableSection(); + Addr = getJumpTableEntryAddress(MR.getJumpTableIndex()); + MR.setConstantVal(JTSection.SectionIdx); + MR.setResultPointer((void*)Addr); + } else { + llvm_unreachable("Unhandled relocation type"); + } + ES->addRelocation(MR); + } + + // Clear per-function data structures. + JTRelocations.clear(); + Relocations.clear(); + CPLocations.clear(); + CPSections.clear(); + JTLocations.clear(); + MBBLocations.clear(); + return false; +} + +/// emitConstantPool - For each constant pool entry, figure out which section +/// the constant should live in and emit the constant +void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { + const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); + if (CP.empty()) return; + + // TODO: handle PIC codegen + assert(TM.getRelocationModel() != Reloc::PIC_ && + "PIC codegen not yet handled for elf constant pools!"); + + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + MachineConstantPoolEntry CPE = CP[i]; + + // Record the constant pool location and the section index + ELFSection &CstPool = EW.getConstantPoolSection(CPE); + CPLocations.push_back(CstPool.size()); + CPSections.push_back(CstPool.SectionIdx); + + if (CPE.isMachineConstantPoolEntry()) + assert("CPE.isMachineConstantPoolEntry not supported yet"); + + // Emit the constant to constant pool section + EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool); + } +} + +/// emitJumpTables - Emit all the jump tables for a given jump table info +/// record to the appropriate section. +void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + // FIXME: handle PIC codegen + assert(TM.getRelocationModel() != Reloc::PIC_ && + "PIC codegen not yet handled for elf jump tables!"); + + const TargetELFWriterInfo *TEW = TM.getELFWriterInfo(); + unsigned EntrySize = 4; //MJTI->getEntrySize(); + + // Get the ELF Section to emit the jump table + ELFSection &JTSection = EW.getJumpTableSection(); + + // For each JT, record its offset from the start of the section + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs; + + // Record JT 'i' offset in the JT section + JTLocations.push_back(JTSection.size()); + + // Each MBB entry in the Jump table section has a relocation entry + // against the current text section. + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { + unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy(); + MachineRelocation MR = + MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]); + + // Add the relocation to the Jump Table section + JTRelocations.push_back(MR); + + // Output placeholder for MBB in the JT section + for (unsigned s=0; s < EntrySize; ++s) + JTSection.emitByte(0); + } + } +} + +} // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h new file mode 100644 index 0000000..2ec1f6e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h @@ -0,0 +1,78 @@ +//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ELFCODEEMITTER_H +#define ELFCODEEMITTER_H + +#include "llvm/CodeGen/ObjectCodeEmitter.h" +#include <vector> + +namespace llvm { + class ELFWriter; + class ELFSection; + + /// ELFCodeEmitter - This class is used by the ELFWriter to + /// emit the code for functions to the ELF file. + class ELFCodeEmitter : public ObjectCodeEmitter { + ELFWriter &EW; + + /// Target machine description + TargetMachine &TM; + + /// Section containing code for functions + ELFSection *ES; + + /// Relocations - Record relocations needed by the current function + std::vector<MachineRelocation> Relocations; + + /// JTRelocations - Record relocations needed by the relocation + /// section. + std::vector<MachineRelocation> JTRelocations; + + /// FnStartPtr - Function offset from the beginning of ELFSection 'ES' + uintptr_t FnStartOff; + public: + explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {} + + /// addRelocation - Register new relocations for this function + void addRelocation(const MachineRelocation &MR) { + Relocations.push_back(MR); + } + + /// emitConstantPool - For each constant pool entry, figure out which + /// section the constant should live in and emit data to it + void emitConstantPool(MachineConstantPool *MCP); + + /// emitJumpTables - Emit all the jump tables for a given jump table + /// info and record them to the appropriate section. + void emitJumpTables(MachineJumpTableInfo *MJTI); + + void startFunction(MachineFunction &F); + bool finishFunction(MachineFunction &F); + + /// emitLabel - Emits a label + virtual void emitLabel(MCSymbol *Label) { + assert("emitLabel not implemented"); + } + + /// getLabelAddress - Return the address of the specified LabelID, + /// only usable after the LabelID has been emitted. + virtual uintptr_t getLabelAddress(MCSymbol *Label) const { + assert("getLabelAddress not implemented"); + return 0; + } + + virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {} + +}; // end class ELFCodeEmitter + +} // end namespace llvm + +#endif + diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.cpp b/contrib/llvm/lib/CodeGen/ELFWriter.cpp new file mode 100644 index 0000000..d14728d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ELFWriter.cpp @@ -0,0 +1,1090 @@ +//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the target-independent ELF writer. This file writes out +// the ELF file in the following order: +// +// #1. ELF Header +// #2. '.text' section +// #3. '.data' section +// #4. '.bss' section (conceptual position in file) +// ... +// #X. '.shstrtab' section +// #Y. Section Table +// +// The entries in the section table are laid out as: +// #0. Null entry [required] +// #1. ".text" entry - the program code +// #2. ".data" entry - global variables with initializers. [ if needed ] +// #3. ".bss" entry - global variables without initializers. [ if needed ] +// ... +// #N. ".shstrtab" entry - String table for the section names. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "elfwriter" +#include "ELF.h" +#include "ELFWriter.h" +#include "ELFCodeEmitter.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/BinaryObject.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetELFWriterInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallString.h" +using namespace llvm; + +char ELFWriter::ID = 0; + +//===----------------------------------------------------------------------===// +// ELFWriter Implementation +//===----------------------------------------------------------------------===// + +ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) + : MachineFunctionPass(ID), O(o), TM(tm), + OutContext(*new MCContext(*TM.getMCAsmInfo())), + TLOF(TM.getTargetLowering()->getObjFileLowering()), + is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64), + isLittleEndian(TM.getTargetData()->isLittleEndian()), + ElfHdr(isLittleEndian, is64Bit) { + + MAI = TM.getMCAsmInfo(); + TEW = TM.getELFWriterInfo(); + + // Create the object code emitter object for this target. + ElfCE = new ELFCodeEmitter(*this); + + // Inital number of sections + NumSections = 0; +} + +ELFWriter::~ELFWriter() { + delete ElfCE; + delete &OutContext; + + while(!SymbolList.empty()) { + delete SymbolList.back(); + SymbolList.pop_back(); + } + + while(!PrivateSyms.empty()) { + delete PrivateSyms.back(); + PrivateSyms.pop_back(); + } + + while(!SectionList.empty()) { + delete SectionList.back(); + SectionList.pop_back(); + } + + // Release the name mangler object. + delete Mang; Mang = 0; +} + +// doInitialization - Emit the file header and all of the global variables for +// the module to the ELF file. +bool ELFWriter::doInitialization(Module &M) { + // Initialize TargetLoweringObjectFile. + const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM); + + Mang = new Mangler(OutContext, *TM.getTargetData()); + + // ELF Header + // ---------- + // Fields e_shnum e_shstrndx are only known after all section have + // been emitted. They locations in the ouput buffer are recorded so + // to be patched up later. + // + // Note + // ---- + // emitWord method behaves differently for ELF32 and ELF64, writing + // 4 bytes in the former and 8 in the last for *_off and *_addr elf types + + ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0] + ElfHdr.emitByte('E'); // e_ident[EI_MAG1] + ElfHdr.emitByte('L'); // e_ident[EI_MAG2] + ElfHdr.emitByte('F'); // e_ident[EI_MAG3] + + ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS] + ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA] + ElfHdr.emitByte(ELF::EV_CURRENT); // e_ident[EI_VERSION] + ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD] + + ElfHdr.emitWord16(ELF::ET_REL); // e_type + ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target + ElfHdr.emitWord32(ELF::EV_CURRENT); // e_version + ElfHdr.emitWord(0); // e_entry, no entry point in .o file + ElfHdr.emitWord(0); // e_phoff, no program header for .o + ELFHdr_e_shoff_Offset = ElfHdr.size(); + ElfHdr.emitWord(0); // e_shoff = sec hdr table off in bytes + ElfHdr.emitWord32(TEW->getEFlags()); // e_flags = whatever the target wants + ElfHdr.emitWord16(TEW->getHdrSize()); // e_ehsize = ELF header size + ElfHdr.emitWord16(0); // e_phentsize = prog header entry size + ElfHdr.emitWord16(0); // e_phnum = # prog header entries = 0 + + // e_shentsize = Section header entry size + ElfHdr.emitWord16(TEW->getSHdrSize()); + + // e_shnum = # of section header ents + ELFHdr_e_shnum_Offset = ElfHdr.size(); + ElfHdr.emitWord16(0); // Placeholder + + // e_shstrndx = Section # of '.shstrtab' + ELFHdr_e_shstrndx_Offset = ElfHdr.size(); + ElfHdr.emitWord16(0); // Placeholder + + // Add the null section, which is required to be first in the file. + getNullSection(); + + // The first entry in the symtab is the null symbol and the second + // is a local symbol containing the module/file name + SymbolList.push_back(new ELFSym()); + SymbolList.push_back(ELFSym::getFileSym()); + + return false; +} + +// AddPendingGlobalSymbol - Add a global to be processed and to +// the global symbol lookup, use a zero index because the table +// index will be determined later. +void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV, + bool AddToLookup /* = false */) { + PendingGlobals.insert(GV); + if (AddToLookup) + GblSymLookup[GV] = 0; +} + +// AddPendingExternalSymbol - Add the external to be processed +// and to the external symbol lookup, use a zero index because +// the symbol table index will be determined later. +void ELFWriter::AddPendingExternalSymbol(const char *External) { + PendingExternals.insert(External); + ExtSymLookup[External] = 0; +} + +ELFSection &ELFWriter::getDataSection() { + const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection(); + return getSection(Data->getSectionName(), Data->getType(), + Data->getFlags(), 4); +} + +ELFSection &ELFWriter::getBSSSection() { + const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection(); + return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4); +} + +// getCtorSection - Get the static constructor section +ELFSection &ELFWriter::getCtorSection() { + const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection(); + return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags()); +} + +// getDtorSection - Get the static destructor section +ELFSection &ELFWriter::getDtorSection() { + const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection(); + return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags()); +} + +// getTextSection - Get the text section for the specified function +ELFSection &ELFWriter::getTextSection(const Function *F) { + const MCSectionELF *Text = + (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM); + return getSection(Text->getSectionName(), Text->getType(), Text->getFlags()); +} + +// getJumpTableSection - Get a read only section for constants when +// emitting jump tables. TODO: add PIC support +ELFSection &ELFWriter::getJumpTableSection() { + const MCSectionELF *JT = + (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly()); + return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(), + TM.getTargetData()->getPointerABIAlignment()); +} + +// getConstantPoolSection - Get a constant pool section based on the machine +// constant pool entry type and relocation info. +ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) { + SectionKind Kind; + switch (CPE.getRelocationInfo()) { + default: llvm_unreachable("Unknown section kind"); + case 2: Kind = SectionKind::getReadOnlyWithRel(); break; + case 1: + Kind = SectionKind::getReadOnlyWithRelLocal(); + break; + case 0: + switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) { + case 4: Kind = SectionKind::getMergeableConst4(); break; + case 8: Kind = SectionKind::getMergeableConst8(); break; + case 16: Kind = SectionKind::getMergeableConst16(); break; + default: Kind = SectionKind::getMergeableConst(); break; + } + } + + const MCSectionELF *CPSect = + (const MCSectionELF *)TLOF.getSectionForConstant(Kind); + return getSection(CPSect->getSectionName(), CPSect->getType(), + CPSect->getFlags(), CPE.getAlignment()); +} + +// getRelocSection - Return the relocation section of section 'S'. 'RelA' +// is true if the relocation section contains entries with addends. +ELFSection &ELFWriter::getRelocSection(ELFSection &S) { + unsigned SectionType = TEW->hasRelocationAddend() ? + ELF::SHT_RELA : ELF::SHT_REL; + + std::string SectionName(".rel"); + if (TEW->hasRelocationAddend()) + SectionName.append("a"); + SectionName.append(S.getName()); + + return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment()); +} + +// getGlobalELFVisibility - Returns the ELF specific visibility type +unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) { + switch (GV->getVisibility()) { + default: + llvm_unreachable("unknown visibility type"); + case GlobalValue::DefaultVisibility: + return ELF::STV_DEFAULT; + case GlobalValue::HiddenVisibility: + return ELF::STV_HIDDEN; + case GlobalValue::ProtectedVisibility: + return ELF::STV_PROTECTED; + } + return 0; +} + +// getGlobalELFBinding - Returns the ELF specific binding type +unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) { + if (GV->hasInternalLinkage()) + return ELF::STB_LOCAL; + + if (GV->isWeakForLinker() && !GV->hasCommonLinkage()) + return ELF::STB_WEAK; + + return ELF::STB_GLOBAL; +} + +// getGlobalELFType - Returns the ELF specific type for a global +unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) { + if (GV->isDeclaration()) + return ELF::STT_NOTYPE; + + if (isa<Function>(GV)) + return ELF::STT_FUNC; + + return ELF::STT_OBJECT; +} + +// IsELFUndefSym - True if the global value must be marked as a symbol +// which points to a SHN_UNDEF section. This means that the symbol has +// no definition on the module. +static bool IsELFUndefSym(const GlobalValue *GV) { + return GV->isDeclaration() || (isa<Function>(GV)); +} + +// AddToSymbolList - Update the symbol lookup and If the symbol is +// private add it to PrivateSyms list, otherwise to SymbolList. +void ELFWriter::AddToSymbolList(ELFSym *GblSym) { + assert(GblSym->isGlobalValue() && "Symbol must be a global value"); + + const GlobalValue *GV = GblSym->getGlobalValue(); + if (GV->hasPrivateLinkage()) { + // For a private symbols, keep track of the index inside + // the private list since it will never go to the symbol + // table and won't be patched up later. + PrivateSyms.push_back(GblSym); + GblSymLookup[GV] = PrivateSyms.size()-1; + } else { + // Non private symbol are left with zero indices until + // they are patched up during the symbol table emition + // (where the indicies are created). + SymbolList.push_back(GblSym); + GblSymLookup[GV] = 0; + } +} + +// EmitGlobal - Choose the right section for global and emit it +void ELFWriter::EmitGlobal(const GlobalValue *GV) { + + // Check if the referenced symbol is already emitted + if (GblSymLookup.find(GV) != GblSymLookup.end()) + return; + + // Handle ELF Bind, Visibility and Type for the current symbol + unsigned SymBind = getGlobalELFBinding(GV); + unsigned SymType = getGlobalELFType(GV); + bool IsUndefSym = IsELFUndefSym(GV); + + ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind) + : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV)); + + if (!IsUndefSym) { + assert(isa<GlobalVariable>(GV) && "GV not a global variable!"); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + + // Handle special llvm globals + if (EmitSpecialLLVMGlobal(GVar)) + return; + + // Get the ELF section where this global belongs from TLOF + const MCSectionELF *S = + (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM); + ELFSection &ES = + getSection(S->getSectionName(), S->getType(), S->getFlags()); + SectionKind Kind = S->getKind(); + + // The symbol align should update the section alignment if needed + const TargetData *TD = TM.getTargetData(); + unsigned Align = TD->getPreferredAlignment(GVar); + unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType()); + GblSym->Size = Size; + + if (S->HasCommonSymbols()) { // Symbol must go to a common section + GblSym->SectionIdx = ELF::SHN_COMMON; + + // A new linkonce section is created for each global in the + // common section, the default alignment is 1 and the symbol + // value contains its alignment. + ES.Align = 1; + GblSym->Value = Align; + + } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS. + GblSym->SectionIdx = ES.SectionIdx; + + // Update the size with alignment and the next object can + // start in the right offset in the section + if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1); + ES.Align = std::max(ES.Align, Align); + + // GblSym->Value should contain the virtual offset inside the section. + // Virtual because the BSS space is not allocated on ELF objects + GblSym->Value = ES.Size; + ES.Size += Size; + + } else { // The symbol must go to some kind of data section + GblSym->SectionIdx = ES.SectionIdx; + + // GblSym->Value should contain the symbol offset inside the section, + // and all symbols should start on their required alignment boundary + ES.Align = std::max(ES.Align, Align); + ES.emitAlignment(Align); + GblSym->Value = ES.size(); + + // Emit the global to the data section 'ES' + EmitGlobalConstant(GVar->getInitializer(), ES); + } + } + + AddToSymbolList(GblSym); +} + +void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS, + ELFSection &GblS) { + + // Print the fields in successive locations. Pad to align if needed! + const TargetData *TD = TM.getTargetData(); + unsigned Size = TD->getTypeAllocSize(CVS->getType()); + const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType()); + uint64_t sizeSoFar = 0; + for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) { + const Constant* field = CVS->getOperand(i); + + // Check if padding is needed and insert one or more 0s. + uint64_t fieldSize = TD->getTypeAllocSize(field->getType()); + uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1)) + - cvsLayout->getElementOffset(i)) - fieldSize; + sizeSoFar += fieldSize + padSize; + + // Now print the actual field value. + EmitGlobalConstant(field, GblS); + + // Insert padding - this may include padding to increase the size of the + // current field up to the ABI size (if the struct is not packed) as well + // as padding to ensure that the next field starts at the right offset. + GblS.emitZeros(padSize); + } + assert(sizeSoFar == cvsLayout->getSizeInBytes() && + "Layout of constant struct may be incorrect!"); +} + +void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) { + const TargetData *TD = TM.getTargetData(); + unsigned Size = TD->getTypeAllocSize(CV->getType()); + + if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) { + for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) + EmitGlobalConstant(CVA->getOperand(i), GblS); + return; + } else if (isa<ConstantAggregateZero>(CV)) { + GblS.emitZeros(Size); + return; + } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) { + EmitGlobalConstantStruct(CVS, GblS); + return; + } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { + APInt Val = CFP->getValueAPF().bitcastToAPInt(); + if (CFP->getType()->isDoubleTy()) + GblS.emitWord64(Val.getZExtValue()); + else if (CFP->getType()->isFloatTy()) + GblS.emitWord32(Val.getZExtValue()); + else if (CFP->getType()->isX86_FP80Ty()) { + unsigned PadSize = TD->getTypeAllocSize(CFP->getType())- + TD->getTypeStoreSize(CFP->getType()); + GblS.emitWordFP80(Val.getRawData(), PadSize); + } else if (CFP->getType()->isPPC_FP128Ty()) + llvm_unreachable("PPC_FP128Ty global emission not implemented"); + return; + } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + if (Size == 1) + GblS.emitByte(CI->getZExtValue()); + else if (Size == 2) + GblS.emitWord16(CI->getZExtValue()); + else if (Size == 4) + GblS.emitWord32(CI->getZExtValue()); + else + EmitGlobalConstantLargeInt(CI, GblS); + return; + } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { + const VectorType *PTy = CP->getType(); + for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I) + EmitGlobalConstant(CP->getOperand(I), GblS); + return; + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + // Resolve a constant expression which returns a (Constant, Offset) + // pair. If 'Res.first' is a GlobalValue, emit a relocation with + // the offset 'Res.second', otherwise emit a global constant like + // it is always done for not contant expression types. + CstExprResTy Res = ResolveConstantExpr(CE); + const Constant *Op = Res.first; + + if (isa<GlobalValue>(Op)) + EmitGlobalDataRelocation(cast<const GlobalValue>(Op), + TD->getTypeAllocSize(Op->getType()), + GblS, Res.second); + else + EmitGlobalConstant(Op, GblS); + + return; + } else if (CV->getType()->getTypeID() == Type::PointerTyID) { + // Fill the data entry with zeros or emit a relocation entry + if (isa<ConstantPointerNull>(CV)) + GblS.emitZeros(Size); + else + EmitGlobalDataRelocation(cast<const GlobalValue>(CV), + Size, GblS); + return; + } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { + // This is a constant address for a global variable or function and + // therefore must be referenced using a relocation entry. + EmitGlobalDataRelocation(GV, Size, GblS); + return; + } + + std::string msg; + raw_string_ostream ErrorMsg(msg); + ErrorMsg << "Constant unimp for type: " << *CV->getType(); + report_fatal_error(ErrorMsg.str()); +} + +// ResolveConstantExpr - Resolve the constant expression until it stop +// yielding other constant expressions. +CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) { + const TargetData *TD = TM.getTargetData(); + + // There ins't constant expression inside others anymore + if (!isa<ConstantExpr>(CV)) + return std::make_pair(CV, 0); + + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); + switch (CE->getOpcode()) { + case Instruction::BitCast: + return ResolveConstantExpr(CE->getOperand(0)); + + case Instruction::GetElementPtr: { + const Constant *ptrVal = CE->getOperand(0); + SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end()); + int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], + idxVec.size()); + return std::make_pair(ptrVal, Offset); + } + case Instruction::IntToPtr: { + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()), + false/*ZExt*/); + return ResolveConstantExpr(Op); + } + case Instruction::PtrToInt: { + Constant *Op = CE->getOperand(0); + const Type *Ty = CE->getType(); + + // We can emit the pointer value into this slot if the slot is an + // integer slot greater or equal to the size of the pointer. + if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType())) + return ResolveConstantExpr(Op); + + llvm_unreachable("Integer size less then pointer size"); + } + case Instruction::Add: + case Instruction::Sub: { + // Only handle cases where there's a constant expression with GlobalValue + // as first operand and ConstantInt as second, which are the cases we can + // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1 + // 1) Instruction::Add => (global) + CstInt + // 2) Instruction::Sub => (global) + -CstInt + const Constant *Op0 = CE->getOperand(0); + const Constant *Op1 = CE->getOperand(1); + assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt"); + + CstExprResTy Res = ResolveConstantExpr(Op0); + assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue"); + + const APInt &RHS = cast<ConstantInt>(Op1)->getValue(); + switch (CE->getOpcode()) { + case Instruction::Add: + return std::make_pair(Res.first, RHS.getSExtValue()); + case Instruction::Sub: + return std::make_pair(Res.first, (-RHS).getSExtValue()); + } + } + } + + report_fatal_error(CE->getOpcodeName() + + StringRef(": Unsupported ConstantExpr type")); + + return std::make_pair(CV, 0); // silence warning +} + +void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, + ELFSection &GblS, int64_t Offset) { + // Create the relocation entry for the global value + MachineRelocation MR = + MachineRelocation::getGV(GblS.getCurrentPCOffset(), + TEW->getAbsoluteLabelMachineRelTy(), + const_cast<GlobalValue*>(GV), + Offset); + + // Fill the data entry with zeros + GblS.emitZeros(Size); + + // Add the relocation entry for the current data section + GblS.addRelocation(MR); +} + +void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI, + ELFSection &S) { + const TargetData *TD = TM.getTargetData(); + unsigned BitWidth = CI->getBitWidth(); + assert(isPowerOf2_32(BitWidth) && + "Non-power-of-2-sized integers not handled!"); + + const uint64_t *RawData = CI->getValue().getRawData(); + uint64_t Val = 0; + for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { + Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i]; + S.emitWord64(Val); + } +} + +/// EmitSpecialLLVMGlobal - Check to see if the specified global is a +/// special global used by LLVM. If so, emit it and return true, otherwise +/// do nothing and return false. +bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { + if (GV->getName() == "llvm.used") + llvm_unreachable("not implemented yet"); + + // Ignore debug and non-emitted data. This handles llvm.compiler.used. + if (GV->getSection() == "llvm.metadata" || + GV->hasAvailableExternallyLinkage()) + return true; + + if (!GV->hasAppendingLinkage()) return false; + + assert(GV->hasInitializer() && "Not a special LLVM global!"); + + const TargetData *TD = TM.getTargetData(); + unsigned Align = TD->getPointerPrefAlignment(); + if (GV->getName() == "llvm.global_ctors") { + ELFSection &Ctor = getCtorSection(); + Ctor.emitAlignment(Align); + EmitXXStructorList(GV->getInitializer(), Ctor); + return true; + } + + if (GV->getName() == "llvm.global_dtors") { + ELFSection &Dtor = getDtorSection(); + Dtor.emitAlignment(Align); + EmitXXStructorList(GV->getInitializer(), Dtor); + return true; + } + + return false; +} + +/// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the +/// function pointers, ignoring the init priority. +void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) { + // Should be an array of '{ int, void ()* }' structs. The first value is the + // init priority, which we ignore. + if (!isa<ConstantArray>(List)) return; + ConstantArray *InitList = cast<ConstantArray>(List); + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ + if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + + if (CS->getOperand(1)->isNullValue()) + return; // Found a null terminator, exit printing. + // Emit the function pointer. + EmitGlobalConstant(CS->getOperand(1), Xtor); + } +} + +bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { + // Nothing to do here, this is all done through the ElfCE object above. + return false; +} + +/// doFinalization - Now that the module has been completely processed, emit +/// the ELF file to 'O'. +bool ELFWriter::doFinalization(Module &M) { + // Emit .data section placeholder + getDataSection(); + + // Emit .bss section placeholder + getBSSSection(); + + // Build and emit data, bss and "common" sections. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + EmitGlobal(I); + + // Emit all pending globals + for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end(); + I != E; ++I) + EmitGlobal(*I); + + // Emit all pending externals + for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end(); + I != E; ++I) + SymbolList.push_back(ELFSym::getExtSym(*I)); + + // Emit a symbol for each section created until now, skip null section + for (unsigned i = 1, e = SectionList.size(); i < e; ++i) { + ELFSection &ES = *SectionList[i]; + ELFSym *SectionSym = ELFSym::getSectionSym(); + SectionSym->SectionIdx = ES.SectionIdx; + SymbolList.push_back(SectionSym); + ES.Sym = SymbolList.back(); + } + + // Emit string table + EmitStringTable(M.getModuleIdentifier()); + + // Emit the symbol table now, if non-empty. + EmitSymbolTable(); + + // Emit the relocation sections. + EmitRelocations(); + + // Emit the sections string table. + EmitSectionTableStringTable(); + + // Dump the sections and section table to the .o file. + OutputSectionsAndSectionTable(); + + return false; +} + +// RelocateField - Patch relocatable field with 'Offset' in 'BO' +// using a 'Value' of known 'Size' +void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset, + int64_t Value, unsigned Size) { + if (Size == 32) + BO.fixWord32(Value, Offset); + else if (Size == 64) + BO.fixWord64(Value, Offset); + else + llvm_unreachable("don't know howto patch relocatable field"); +} + +/// EmitRelocations - Emit relocations +void ELFWriter::EmitRelocations() { + + // True if the target uses the relocation entry to hold the addend, + // otherwise the addend is written directly to the relocatable field. + bool HasRelA = TEW->hasRelocationAddend(); + + // Create Relocation sections for each section which needs it. + for (unsigned i=0, e=SectionList.size(); i != e; ++i) { + ELFSection &S = *SectionList[i]; + + // This section does not have relocations + if (!S.hasRelocations()) continue; + ELFSection &RelSec = getRelocSection(S); + + // 'Link' - Section hdr idx of the associated symbol table + // 'Info' - Section hdr idx of the section to which the relocation applies + ELFSection &SymTab = getSymbolTableSection(); + RelSec.Link = SymTab.SectionIdx; + RelSec.Info = S.SectionIdx; + RelSec.EntSize = TEW->getRelocationEntrySize(); + + // Get the relocations from Section + std::vector<MachineRelocation> Relos = S.getRelocations(); + for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(), + MRE = Relos.end(); MRI != MRE; ++MRI) { + MachineRelocation &MR = *MRI; + + // Relocatable field offset from the section start + unsigned RelOffset = MR.getMachineCodeOffset(); + + // Symbol index in the symbol table + unsigned SymIdx = 0; + + // Target specific relocation field type and size + unsigned RelType = TEW->getRelocationType(MR.getRelocationType()); + unsigned RelTySize = TEW->getRelocationTySize(RelType); + int64_t Addend = 0; + + // There are several machine relocations types, and each one of + // them needs a different approach to retrieve the symbol table index. + if (MR.isGlobalValue()) { + const GlobalValue *G = MR.getGlobalValue(); + int64_t GlobalOffset = MR.getConstantVal(); + SymIdx = GblSymLookup[G]; + if (G->hasPrivateLinkage()) { + // If the target uses a section offset in the relocation: + // SymIdx + Addend = section sym for global + section offset + unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx; + Addend = PrivateSyms[SymIdx]->Value + GlobalOffset; + SymIdx = SectionList[SectionIdx]->getSymbolTableIndex(); + } else { + Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset); + } + } else if (MR.isExternalSymbol()) { + const char *ExtSym = MR.getExternalSymbol(); + SymIdx = ExtSymLookup[ExtSym]; + Addend = TEW->getDefaultAddendForRelTy(RelType); + } else { + // Get the symbol index for the section symbol + unsigned SectionIdx = MR.getConstantVal(); + SymIdx = SectionList[SectionIdx]->getSymbolTableIndex(); + + // The symbol offset inside the section + int64_t SymOffset = (int64_t)MR.getResultPointer(); + + // For pc relative relocations where symbols are defined in the same + // section they are referenced, ignore the relocation entry and patch + // the relocatable field with the symbol offset directly. + if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) { + int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType); + RelocateField(S, RelOffset, Value, RelTySize); + continue; + } + + Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset); + } + + // The target without addend on the relocation symbol must be + // patched in the relocation place itself to contain the addend + // otherwise write zeros to make sure there is no garbage there + RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize); + + // Get the relocation entry and emit to the relocation section + ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend); + EmitRelocation(RelSec, Rel, HasRelA); + } + } +} + +/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel' +void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, + bool HasRelA) { + RelSec.emitWord(Rel.getOffset()); + RelSec.emitWord(Rel.getInfo(is64Bit)); + if (HasRelA) + RelSec.emitWord(Rel.getAddend()); +} + +/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable' +void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) { + if (is64Bit) { + SymbolTable.emitWord32(Sym.NameIdx); + SymbolTable.emitByte(Sym.Info); + SymbolTable.emitByte(Sym.Other); + SymbolTable.emitWord16(Sym.SectionIdx); + SymbolTable.emitWord64(Sym.Value); + SymbolTable.emitWord64(Sym.Size); + } else { + SymbolTable.emitWord32(Sym.NameIdx); + SymbolTable.emitWord32(Sym.Value); + SymbolTable.emitWord32(Sym.Size); + SymbolTable.emitByte(Sym.Info); + SymbolTable.emitByte(Sym.Other); + SymbolTable.emitWord16(Sym.SectionIdx); + } +} + +/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab' +/// Section Header Table +void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, + const ELFSection &SHdr) { + SHdrTab.emitWord32(SHdr.NameIdx); + SHdrTab.emitWord32(SHdr.Type); + if (is64Bit) { + SHdrTab.emitWord64(SHdr.Flags); + SHdrTab.emitWord(SHdr.Addr); + SHdrTab.emitWord(SHdr.Offset); + SHdrTab.emitWord64(SHdr.Size); + SHdrTab.emitWord32(SHdr.Link); + SHdrTab.emitWord32(SHdr.Info); + SHdrTab.emitWord64(SHdr.Align); + SHdrTab.emitWord64(SHdr.EntSize); + } else { + SHdrTab.emitWord32(SHdr.Flags); + SHdrTab.emitWord(SHdr.Addr); + SHdrTab.emitWord(SHdr.Offset); + SHdrTab.emitWord32(SHdr.Size); + SHdrTab.emitWord32(SHdr.Link); + SHdrTab.emitWord32(SHdr.Info); + SHdrTab.emitWord32(SHdr.Align); + SHdrTab.emitWord32(SHdr.EntSize); + } +} + +/// EmitStringTable - If the current symbol table is non-empty, emit the string +/// table for it +void ELFWriter::EmitStringTable(const std::string &ModuleName) { + if (!SymbolList.size()) return; // Empty symbol table. + ELFSection &StrTab = getStringTableSection(); + + // Set the zero'th symbol to a null byte, as required. + StrTab.emitByte(0); + + // Walk on the symbol list and write symbol names into the string table. + unsigned Index = 1; + for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) { + ELFSym &Sym = *(*I); + + std::string Name; + if (Sym.isGlobalValue()) { + SmallString<40> NameStr; + Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false); + Name.append(NameStr.begin(), NameStr.end()); + } else if (Sym.isExternalSym()) + Name.append(Sym.getExternalSymbol()); + else if (Sym.isFileType()) + Name.append(ModuleName); + + if (Name.empty()) { + Sym.NameIdx = 0; + } else { + Sym.NameIdx = Index; + StrTab.emitString(Name); + + // Keep track of the number of bytes emitted to this section. + Index += Name.size()+1; + } + } + assert(Index == StrTab.size()); + StrTab.Size = Index; +} + +// SortSymbols - On the symbol table local symbols must come before +// all other symbols with non-local bindings. The return value is +// the position of the first non local symbol. +unsigned ELFWriter::SortSymbols() { + unsigned FirstNonLocalSymbol; + std::vector<ELFSym*> LocalSyms, OtherSyms; + + for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) { + if ((*I)->isLocalBind()) + LocalSyms.push_back(*I); + else + OtherSyms.push_back(*I); + } + SymbolList.clear(); + FirstNonLocalSymbol = LocalSyms.size(); + + for (unsigned i = 0; i < FirstNonLocalSymbol; ++i) + SymbolList.push_back(LocalSyms[i]); + + for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I) + SymbolList.push_back(*I); + + LocalSyms.clear(); + OtherSyms.clear(); + + return FirstNonLocalSymbol; +} + +/// EmitSymbolTable - Emit the symbol table itself. +void ELFWriter::EmitSymbolTable() { + if (!SymbolList.size()) return; // Empty symbol table. + + // Now that we have emitted the string table and know the offset into the + // string table of each symbol, emit the symbol table itself. + ELFSection &SymTab = getSymbolTableSection(); + SymTab.Align = TEW->getPrefELFAlignment(); + + // Section Index of .strtab. + SymTab.Link = getStringTableSection().SectionIdx; + + // Size of each symtab entry. + SymTab.EntSize = TEW->getSymTabEntrySize(); + + // Reorder the symbol table with local symbols first! + unsigned FirstNonLocalSymbol = SortSymbols(); + + // Emit all the symbols to the symbol table. + for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) { + ELFSym &Sym = *SymbolList[i]; + + // Emit symbol to the symbol table + EmitSymbol(SymTab, Sym); + + // Record the symbol table index for each symbol + if (Sym.isGlobalValue()) + GblSymLookup[Sym.getGlobalValue()] = i; + else if (Sym.isExternalSym()) + ExtSymLookup[Sym.getExternalSymbol()] = i; + + // Keep track on the symbol index into the symbol table + Sym.SymTabIdx = i; + } + + // One greater than the symbol table index of the last local symbol + SymTab.Info = FirstNonLocalSymbol; + SymTab.Size = SymTab.size(); +} + +/// EmitSectionTableStringTable - This method adds and emits a section for the +/// ELF Section Table string table: the string table that holds all of the +/// section names. +void ELFWriter::EmitSectionTableStringTable() { + // First step: add the section for the string table to the list of sections: + ELFSection &SHStrTab = getSectionHeaderStringTableSection(); + + // Now that we know which section number is the .shstrtab section, update the + // e_shstrndx entry in the ELF header. + ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset); + + // Set the NameIdx of each section in the string table and emit the bytes for + // the string table. + unsigned Index = 0; + + for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { + ELFSection &S = *(*I); + // Set the index into the table. Note if we have lots of entries with + // common suffixes, we could memoize them here if we cared. + S.NameIdx = Index; + SHStrTab.emitString(S.getName()); + + // Keep track of the number of bytes emitted to this section. + Index += S.getName().size()+1; + } + + // Set the size of .shstrtab now that we know what it is. + assert(Index == SHStrTab.size()); + SHStrTab.Size = Index; +} + +/// OutputSectionsAndSectionTable - Now that we have constructed the file header +/// and all of the sections, emit these to the ostream destination and emit the +/// SectionTable. +void ELFWriter::OutputSectionsAndSectionTable() { + // Pass #1: Compute the file offset for each section. + size_t FileOff = ElfHdr.size(); // File header first. + + // Adjust alignment of all section if needed, skip the null section. + for (unsigned i=1, e=SectionList.size(); i < e; ++i) { + ELFSection &ES = *SectionList[i]; + if (!ES.size()) { + ES.Offset = FileOff; + continue; + } + + // Update Section size + if (!ES.Size) + ES.Size = ES.size(); + + // Align FileOff to whatever the alignment restrictions of the section are. + if (ES.Align) + FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1); + + ES.Offset = FileOff; + FileOff += ES.Size; + } + + // Align Section Header. + unsigned TableAlign = TEW->getPrefELFAlignment(); + FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); + + // Now that we know where all of the sections will be emitted, set the e_shnum + // entry in the ELF header. + ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset); + + // Now that we know the offset in the file of the section table, update the + // e_shoff address in the ELF header. + ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset); + + // Now that we know all of the data in the file header, emit it and all of the + // sections! + O.write((char *)&ElfHdr.getData()[0], ElfHdr.size()); + FileOff = ElfHdr.size(); + + // Section Header Table blob + BinaryObject SHdrTable(isLittleEndian, is64Bit); + + // Emit all of sections to the file and build the section header table. + for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { + ELFSection &S = *(*I); + DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() + << ", Size: " << S.Size << ", Offset: " << S.Offset + << ", SectionData Size: " << S.size() << "\n"); + + // Align FileOff to whatever the alignment restrictions of the section are. + if (S.size()) { + if (S.Align) { + for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1); + FileOff != NewFileOff; ++FileOff) + O << (char)0xAB; + } + O.write((char *)&S.getData()[0], S.Size); + FileOff += S.Size; + } + + EmitSectionHeader(SHdrTable, S); + } + + // Align output for the section table. + for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); + FileOff != NewFileOff; ++FileOff) + O << (char)0xAB; + + // Emit the section table itself. + O.write((char *)&SHdrTable.getData()[0], SHdrTable.size()); +} diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.h b/contrib/llvm/lib/CodeGen/ELFWriter.h new file mode 100644 index 0000000..b8bac55 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ELFWriter.h @@ -0,0 +1,251 @@ +//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ELFWriter class. +// +//===----------------------------------------------------------------------===// + +#ifndef ELFWRITER_H +#define ELFWRITER_H + +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include <map> + +namespace llvm { + class BinaryObject; + class Constant; + class ConstantInt; + class ConstantStruct; + class ELFCodeEmitter; + class ELFRelocation; + class ELFSection; + struct ELFSym; + class GlobalVariable; + class JITDebugRegisterer; + class Mangler; + class MachineCodeEmitter; + class MachineConstantPoolEntry; + class ObjectCodeEmitter; + class MCAsmInfo; + class TargetELFWriterInfo; + class TargetLoweringObjectFile; + class raw_ostream; + class SectionKind; + class MCContext; + class TargetMachine; + + typedef std::vector<ELFSym*>::iterator ELFSymIter; + typedef std::vector<ELFSection*>::iterator ELFSectionIter; + typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter; + typedef SetVector<const char *>::const_iterator PendingExtsIter; + typedef std::pair<const Constant *, int64_t> CstExprResTy; + + /// ELFWriter - This class implements the common target-independent code for + /// writing ELF files. Targets should derive a class from this to + /// parameterize the output format. + /// + class ELFWriter : public MachineFunctionPass { + friend class ELFCodeEmitter; + friend class JITDebugRegisterer; + public: + static char ID; + + /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter + ObjectCodeEmitter *getObjectCodeEmitter() { + return reinterpret_cast<ObjectCodeEmitter*>(ElfCE); + } + + ELFWriter(raw_ostream &O, TargetMachine &TM); + ~ELFWriter(); + + protected: + /// Output stream to send the resultant object file to. + raw_ostream &O; + + /// Target machine description. + TargetMachine &TM; + + /// Context object for machine code objects. + MCContext &OutContext; + + /// Target Elf Writer description. + const TargetELFWriterInfo *TEW; + + /// Mang - The object used to perform name mangling for this module. + Mangler *Mang; + + /// MCE - The MachineCodeEmitter object that we are exposing to emit machine + /// code for functions to the .o file. + ELFCodeEmitter *ElfCE; + + /// TLOF - Target Lowering Object File, provide section names for globals + /// and other object file specific stuff + const TargetLoweringObjectFile &TLOF; + + /// MAI - Target Asm Info, provide information about section names for + /// globals and other target specific stuff. + const MCAsmInfo *MAI; + + //===------------------------------------------------------------------===// + // Properties inferred automatically from the target machine. + //===------------------------------------------------------------------===// + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating whether to emit a 32- or 64-bit ELF file. + bool is64Bit, isLittleEndian; + + /// doInitialization - Emit the file header and all of the global variables + /// for the module to the ELF file. + bool doInitialization(Module &M); + bool runOnMachineFunction(MachineFunction &MF); + + /// doFinalization - Now that the module has been completely processed, emit + /// the ELF file to 'O'. + bool doFinalization(Module &M); + + private: + /// Blob containing the Elf header + BinaryObject ElfHdr; + + /// SectionList - This is the list of sections that we have emitted to the + /// file. Once the file has been completely built, the section header table + /// is constructed from this info. + std::vector<ELFSection*> SectionList; + unsigned NumSections; // Always = SectionList.size() + + /// SectionLookup - This is a mapping from section name to section number in + /// the SectionList. Used to quickly gather the Section Index from MAI names + std::map<std::string, ELFSection*> SectionLookup; + + /// PendingGlobals - Globals not processed as symbols yet. + SetVector<const GlobalValue*> PendingGlobals; + + /// GblSymLookup - This is a mapping from global value to a symbol index + /// in the symbol table or private symbols list. This is useful since reloc + /// symbol references must be quickly mapped to their indices on the lists. + std::map<const GlobalValue*, uint32_t> GblSymLookup; + + /// PendingExternals - Externals not processed as symbols yet. + SetVector<const char *> PendingExternals; + + /// ExtSymLookup - This is a mapping from externals to a symbol index + /// in the symbol table list. This is useful since reloc symbol references + /// must be quickly mapped to their symbol table indices. + std::map<const char *, uint32_t> ExtSymLookup; + + /// SymbolList - This is the list of symbols emitted to the symbol table. + /// When the SymbolList is finally built, local symbols must be placed in + /// the beginning while non-locals at the end. + std::vector<ELFSym*> SymbolList; + + /// PrivateSyms - Record private symbols, every symbol here must never be + /// present in the SymbolList. + std::vector<ELFSym*> PrivateSyms; + + /// getSection - Return the section with the specified name, creating a new + /// section if one does not already exist. + ELFSection &getSection(const std::string &Name, unsigned Type, + unsigned Flags = 0, unsigned Align = 0) { + ELFSection *&SN = SectionLookup[Name]; + if (SN) return *SN; + + SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit)); + SN = SectionList.back(); + SN->SectionIdx = NumSections++; + SN->Type = Type; + SN->Flags = Flags; + SN->Link = ELF::SHN_UNDEF; + SN->Align = Align; + return *SN; + } + + ELFSection &getNonExecStackSection() { + return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1); + } + + ELFSection &getSymbolTableSection() { + return getSection(".symtab", ELF::SHT_SYMTAB, 0); + } + + ELFSection &getStringTableSection() { + return getSection(".strtab", ELF::SHT_STRTAB, 0, 1); + } + + ELFSection &getSectionHeaderStringTableSection() { + return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1); + } + + ELFSection &getNullSection() { + return getSection("", ELF::SHT_NULL, 0); + } + + ELFSection &getDataSection(); + ELFSection &getBSSSection(); + ELFSection &getCtorSection(); + ELFSection &getDtorSection(); + ELFSection &getJumpTableSection(); + ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE); + ELFSection &getTextSection(const Function *F); + ELFSection &getRelocSection(ELFSection &S); + + // Helpers for obtaining ELF specific info. + unsigned getGlobalELFBinding(const GlobalValue *GV); + unsigned getGlobalELFType(const GlobalValue *GV); + unsigned getGlobalELFVisibility(const GlobalValue *GV); + + // AddPendingGlobalSymbol - Add a global to be processed and to + // the global symbol lookup, use a zero index because the table + // index will be determined later. + void AddPendingGlobalSymbol(const GlobalValue *GV, + bool AddToLookup = false); + + // AddPendingExternalSymbol - Add the external to be processed + // and to the external symbol lookup, use a zero index because + // the symbol table index will be determined later. + void AddPendingExternalSymbol(const char *External); + + // AddToSymbolList - Update the symbol lookup and If the symbol is + // private add it to PrivateSyms list, otherwise to SymbolList. + void AddToSymbolList(ELFSym *GblSym); + + // As we complete the ELF file, we need to update fields in the ELF header + // (e.g. the location of the section table). These members keep track of + // the offset in ELFHeader of these various pieces to update and other + // locations in the file. + unsigned ELFHdr_e_shoff_Offset; // e_shoff in ELF header. + unsigned ELFHdr_e_shstrndx_Offset; // e_shstrndx in ELF header. + unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header. + + private: + void EmitGlobal(const GlobalValue *GV); + void EmitGlobalConstant(const Constant *C, ELFSection &GblS); + void EmitGlobalConstantStruct(const ConstantStruct *CVS, + ELFSection &GblS); + void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S); + void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, + ELFSection &GblS, int64_t Offset = 0); + bool EmitSpecialLLVMGlobal(const GlobalVariable *GV); + void EmitXXStructorList(Constant *List, ELFSection &Xtor); + void EmitRelocations(); + void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA); + void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr); + void EmitSectionTableStringTable(); + void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym); + void EmitSymbolTable(); + void EmitStringTable(const std::string &ModuleName); + void OutputSectionsAndSectionTable(); + void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value, + unsigned Size); + unsigned SortSymbols(); + CstExprResTy ResolveConstantExpr(const Constant *CV); + }; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp new file mode 100644 index 0000000..0f6e882 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp @@ -0,0 +1,212 @@ +//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the GCFunctionInfo class and GCModuleInfo pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + + class Printer : public FunctionPass { + static char ID; + raw_ostream &OS; + + public: + Printer() : FunctionPass(ID), OS(errs()) {} + explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} + + + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnFunction(Function &F); + }; + + class Deleter : public FunctionPass { + static char ID; + + public: + Deleter(); + + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnFunction(Function &F); + bool doFinalization(Module &M); + }; + +} + +INITIALIZE_PASS(GCModuleInfo, "collector-metadata", + "Create Garbage Collector Module Metadata", false, false); + +// ----------------------------------------------------------------------------- + +GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S) + : F(F), S(S), FrameSize(~0LL) {} + +GCFunctionInfo::~GCFunctionInfo() {} + +// ----------------------------------------------------------------------------- + +char GCModuleInfo::ID = 0; + +GCModuleInfo::GCModuleInfo() + : ImmutablePass(ID) {} + +GCModuleInfo::~GCModuleInfo() { + clear(); +} + +GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, + const std::string &Name) { + strategy_map_type::iterator NMI = StrategyMap.find(Name); + if (NMI != StrategyMap.end()) + return NMI->getValue(); + + for (GCRegistry::iterator I = GCRegistry::begin(), + E = GCRegistry::end(); I != E; ++I) { + if (Name == I->getName()) { + GCStrategy *S = I->instantiate(); + S->M = M; + S->Name = Name; + StrategyMap.GetOrCreateValue(Name).setValue(S); + StrategyList.push_back(S); + return S; + } + } + + dbgs() << "unsupported GC: " << Name << "\n"; + llvm_unreachable(0); +} + +GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { + assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!"); + assert(F.hasGC()); + + finfo_map_type::iterator I = FInfoMap.find(&F); + if (I != FInfoMap.end()) + return *I->second; + + GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC()); + GCFunctionInfo *GFI = S->insertFunctionInfo(F); + FInfoMap[&F] = GFI; + return *GFI; +} + +void GCModuleInfo::clear() { + FInfoMap.clear(); + StrategyMap.clear(); + + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; + StrategyList.clear(); +} + +// ----------------------------------------------------------------------------- + +char Printer::ID = 0; + +FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) { + return new Printer(OS); +} + + +const char *Printer::getPassName() const { + return "Print Garbage Collector Information"; +} + +void Printer::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired<GCModuleInfo>(); +} + +static const char *DescKind(GC::PointKind Kind) { + switch (Kind) { + default: llvm_unreachable("Unknown GC point kind"); + case GC::Loop: return "loop"; + case GC::Return: return "return"; + case GC::PreCall: return "pre-call"; + case GC::PostCall: return "post-call"; + } +} + +bool Printer::runOnFunction(Function &F) { + if (F.hasGC()) return false; + + GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F); + + OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n"; + for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), + RE = FD->roots_end(); RI != RE; ++RI) + OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; + + OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n"; + for (GCFunctionInfo::iterator PI = FD->begin(), + PE = FD->end(); PI != PE; ++PI) { + + OS << "\t" << PI->Label->getName() << ": " + << DescKind(PI->Kind) << ", live = {"; + + for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI), + RE = FD->live_end(PI);;) { + OS << " " << RI->Num; + if (++RI == RE) + break; + OS << ","; + } + + OS << " }\n"; + } + + return false; +} + +// ----------------------------------------------------------------------------- + +char Deleter::ID = 0; + +FunctionPass *llvm::createGCInfoDeleter() { + return new Deleter(); +} + +Deleter::Deleter() : FunctionPass(ID) {} + +const char *Deleter::getPassName() const { + return "Delete Garbage Collector Information"; +} + +void Deleter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<GCModuleInfo>(); +} + +bool Deleter::runOnFunction(Function &MF) { + return false; +} + +bool Deleter::doFinalization(Module &M) { + GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(GMI && "Deleter didn't require GCModuleInfo?!"); + GMI->clear(); + return false; +} diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp new file mode 100644 index 0000000..f80e9ce --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp @@ -0,0 +1,27 @@ +//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the abstract base class GCMetadataPrinter. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadataPrinter.h" +using namespace llvm; + +GCMetadataPrinter::GCMetadataPrinter() { } + +GCMetadataPrinter::~GCMetadataPrinter() { } + +void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) { + // Default is no action. +} + +void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) { + // Default is no action. +} diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp new file mode 100644 index 0000000..719fa19 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp @@ -0,0 +1,398 @@ +//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements target- and collector-independent garbage collection +// infrastructure. +// +// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots +// are identified in SelectionDAGISel. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + + /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or + /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as + /// directed by the GCStrategy. It also performs automatic root initialization + /// and custom intrinsic lowering. + class LowerIntrinsics : public FunctionPass { + static bool NeedsDefaultLoweringPass(const GCStrategy &C); + static bool NeedsCustomLoweringPass(const GCStrategy &C); + static bool CouldBecomeSafePoint(Instruction *I); + bool PerformDefaultLowering(Function &F, GCStrategy &Coll); + static bool InsertRootInitializers(Function &F, + AllocaInst **Roots, unsigned Count); + + public: + static char ID; + + LowerIntrinsics(); + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + }; + + + /// MachineCodeAnalysis - This is a target-independent pass over the machine + /// function representation to identify safe points for the garbage collector + /// in the machine code. It inserts labels at safe points and populates a + /// GCMetadata record for each function. + class MachineCodeAnalysis : public MachineFunctionPass { + const TargetMachine *TM; + GCFunctionInfo *FI; + MachineModuleInfo *MMI; + const TargetInstrInfo *TII; + + void FindSafePoints(MachineFunction &MF); + void VisitCallPoint(MachineBasicBlock::iterator MI); + MCSymbol *InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const; + + void FindStackOffsets(MachineFunction &MF); + + public: + static char ID; + + MachineCodeAnalysis(); + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnMachineFunction(MachineFunction &MF); + }; + +} + +// ----------------------------------------------------------------------------- + +GCStrategy::GCStrategy() : + NeededSafePoints(0), + CustomReadBarriers(false), + CustomWriteBarriers(false), + CustomRoots(false), + InitRoots(true), + UsesMetadata(false) +{} + +GCStrategy::~GCStrategy() { + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; + + Functions.clear(); +} + +bool GCStrategy::initializeCustomLowering(Module &M) { return false; } + +bool GCStrategy::performCustomLowering(Function &F) { + dbgs() << "gc " << getName() << " must override performCustomLowering.\n"; + llvm_unreachable(0); + return 0; +} + +GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { + GCFunctionInfo *FI = new GCFunctionInfo(F, *this); + Functions.push_back(FI); + return FI; +} + +// ----------------------------------------------------------------------------- + +FunctionPass *llvm::createGCLoweringPass() { + return new LowerIntrinsics(); +} + +char LowerIntrinsics::ID = 0; + +LowerIntrinsics::LowerIntrinsics() + : FunctionPass(ID) {} + +const char *LowerIntrinsics::getPassName() const { + return "Lower Garbage Collection Instructions"; +} + +void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<GCModuleInfo>(); +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. +bool LowerIntrinsics::doInitialization(Module &M) { + // FIXME: This is rather antisocial in the context of a JIT since it performs + // work against the entire module. But this cannot be done at + // runFunction time (initializeCustomLowering likely needs to change + // the module). + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?"); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration() && I->hasGC()) + MI->getFunctionInfo(*I); // Instantiate the GC strategy. + + bool MadeChange = false; + for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) + if (NeedsCustomLoweringPass(**I)) + if ((*I)->initializeCustomLowering(M)) + MadeChange = true; + + return MadeChange; +} + +bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, + unsigned Count) { + // Scroll past alloca instructions. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + while (isa<AllocaInst>(IP)) ++IP; + + // Search for initializers in the initial BB. + SmallPtrSet<AllocaInst*,16> InitedRoots; + for (; !CouldBecomeSafePoint(IP); ++IP) + if (StoreInst *SI = dyn_cast<StoreInst>(IP)) + if (AllocaInst *AI = + dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts())) + InitedRoots.insert(AI); + + // Add root initializers. + bool MadeChange = false; + + for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) + if (!InitedRoots.count(*I)) { + StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>( + cast<PointerType>((*I)->getType())->getElementType())), + *I); + SI->insertAfter(*I); + MadeChange = true; + } + + return MadeChange; +} + +bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) { + // Default lowering is necessary only if read or write barriers have a default + // action. The default for roots is no action. + return !C.customWriteBarrier() + || !C.customReadBarrier() + || C.initializeRoots(); +} + +bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) { + // Custom lowering is only necessary if enabled for some action. + return C.customWriteBarrier() + || C.customReadBarrier() + || C.customRoots(); +} + +/// CouldBecomeSafePoint - Predicate to conservatively determine whether the +/// instruction could introduce a safe point. +bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) { + // The natural definition of instructions which could introduce safe points + // are: + // + // - call, invoke (AfterCall, BeforeCall) + // - phis (Loops) + // - invoke, ret, unwind (Exit) + // + // However, instructions as seemingly inoccuous as arithmetic can become + // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead + // it is necessary to take a conservative approach. + + if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || + isa<StoreInst>(I) || isa<LoadInst>(I)) + return false; + + // llvm.gcroot is safe because it doesn't do anything at runtime. + if (CallInst *CI = dyn_cast<CallInst>(I)) + if (Function *F = CI->getCalledFunction()) + if (unsigned IID = F->getIntrinsicID()) + if (IID == Intrinsic::gcroot) + return false; + + return true; +} + +/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores. +/// Leave gcroot intrinsics; the code generator needs to see those. +bool LowerIntrinsics::runOnFunction(Function &F) { + // Quick exit for functions that do not use GC. + if (!F.hasGC()) + return false; + + GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F); + GCStrategy &S = FI.getStrategy(); + + bool MadeChange = false; + + if (NeedsDefaultLoweringPass(S)) + MadeChange |= PerformDefaultLowering(F, S); + + if (NeedsCustomLoweringPass(S)) + MadeChange |= S.performCustomLowering(F); + + return MadeChange; +} + +bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { + bool LowerWr = !S.customWriteBarrier(); + bool LowerRd = !S.customReadBarrier(); + bool InitRoots = S.initializeRoots(); + + SmallVector<AllocaInst*, 32> Roots; + + bool MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) { + Function *F = CI->getCalledFunction(); + switch (F->getIntrinsicID()) { + case Intrinsic::gcwrite: + if (LowerWr) { + // Replace a write barrier with a simple store. + Value *St = new StoreInst(CI->getArgOperand(0), + CI->getArgOperand(2), CI); + CI->replaceAllUsesWith(St); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcread: + if (LowerRd) { + // Replace a read barrier with a simple load. + Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); + Ld->takeName(CI); + CI->replaceAllUsesWith(Ld); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcroot: + if (InitRoots) { + // Initialize the GC root, but do not delete the intrinsic. The + // backend needs the intrinsic to flag the stack slot. + Roots.push_back(cast<AllocaInst>( + CI->getArgOperand(0)->stripPointerCasts())); + } + break; + default: + continue; + } + + MadeChange = true; + } + } + } + + if (Roots.size()) + MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); + + return MadeChange; +} + +// ----------------------------------------------------------------------------- + +FunctionPass *llvm::createGCMachineCodeAnalysisPass() { + return new MachineCodeAnalysis(); +} + +char MachineCodeAnalysis::ID = 0; + +MachineCodeAnalysis::MachineCodeAnalysis() + : MachineFunctionPass(ID) {} + +const char *MachineCodeAnalysis::getPassName() const { + return "Analyze Machine Code For Garbage Collection"; +} + +void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired<MachineModuleInfo>(); + AU.addRequired<GCModuleInfo>(); +} + +MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { + MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); + BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); + return Label; +} + +void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { + // Find the return address (next instruction), too, so as to bracket the call + // instruction. + MachineBasicBlock::iterator RAI = CI; + ++RAI; + + if (FI->getStrategy().needsSafePoint(GC::PreCall)) + FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI, + CI->getDebugLoc())); + + if (FI->getStrategy().needsSafePoint(GC::PostCall)) + FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI, + CI->getDebugLoc())); +} + +void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { + for (MachineFunction::iterator BBI = MF.begin(), + BBE = MF.end(); BBI != BBE; ++BBI) + for (MachineBasicBlock::iterator MI = BBI->begin(), + ME = BBI->end(); MI != ME; ++MI) + if (MI->getDesc().isCall()) + VisitCallPoint(MI); +} + +void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { + const TargetRegisterInfo *TRI = TM->getRegisterInfo(); + assert(TRI && "TargetRegisterInfo not available!"); + + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(), + RE = FI->roots_end(); RI != RE; ++RI) + RI->StackOffset = TRI->getFrameIndexOffset(MF, RI->Num); +} + +bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { + // Quick exit for functions that do not use GC. + if (!MF.getFunction()->hasGC()) + return false; + + FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); + if (!FI->getStrategy().needsSafePoints()) + return false; + + TM = &MF.getTarget(); + MMI = &getAnalysis<MachineModuleInfo>(); + TII = TM->getInstrInfo(); + + // Find the size of the stack frame. + FI->setFrameSize(MF.getFrameInfo()->getStackSize()); + + // Find all safe points. + FindSafePoints(MF); + + // Find the stack offsets for all roots. + FindStackOffsets(MF); + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp new file mode 100644 index 0000000..0ea30d7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -0,0 +1,1447 @@ +//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level if-conversion pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ifcvt" +#include "BranchFolding.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +// Hidden options for help debugging. +static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); +static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); +static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden); +static cl::opt<bool> DisableSimple("disable-ifcvt-simple", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond", + cl::init(false), cl::Hidden); +static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold", + cl::init(true), cl::Hidden); + +STATISTIC(NumSimple, "Number of simple if-conversions performed"); +STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed"); +STATISTIC(NumTriangle, "Number of triangle if-conversions performed"); +STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed"); +STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed"); +STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); +STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); +STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); +STATISTIC(NumDupBBs, "Number of duplicated blocks"); + +namespace { + class IfConverter : public MachineFunctionPass { + enum IfcvtKind { + ICNotClassfied, // BB data valid, but not classified. + ICSimpleFalse, // Same as ICSimple, but on the false path. + ICSimple, // BB is entry of an one split, no rejoin sub-CFG. + ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition. + ICTriangleRev, // Same as ICTriangle, but true path rev condition. + ICTriangleFalse, // Same as ICTriangle, but on the false path. + ICTriangle, // BB is entry of a triangle sub-CFG. + ICDiamond // BB is entry of a diamond sub-CFG. + }; + + /// BBInfo - One per MachineBasicBlock, this is used to cache the result + /// if-conversion feasibility analysis. This includes results from + /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its + /// classification, and common tail block of its successors (if it's a + /// diamond shape), its size, whether it's predicable, and whether any + /// instruction can clobber the 'would-be' predicate. + /// + /// IsDone - True if BB is not to be considered for ifcvt. + /// IsBeingAnalyzed - True if BB is currently being analyzed. + /// IsAnalyzed - True if BB has been analyzed (info is still valid). + /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed. + /// IsBrAnalyzable - True if AnalyzeBranch() returns false. + /// HasFallThrough - True if BB may fallthrough to the following BB. + /// IsUnpredicable - True if BB is known to be unpredicable. + /// ClobbersPred - True if BB could modify predicates (e.g. has + /// cmp, call, etc.) + /// NonPredSize - Number of non-predicated instructions. + /// BB - Corresponding MachineBasicBlock. + /// TrueBB / FalseBB- See AnalyzeBranch(). + /// BrCond - Conditions for end of block conditional branches. + /// Predicate - Predicate used in the BB. + struct BBInfo { + bool IsDone : 1; + bool IsBeingAnalyzed : 1; + bool IsAnalyzed : 1; + bool IsEnqueued : 1; + bool IsBrAnalyzable : 1; + bool HasFallThrough : 1; + bool IsUnpredicable : 1; + bool CannotBeCopied : 1; + bool ClobbersPred : 1; + unsigned NonPredSize; + MachineBasicBlock *BB; + MachineBasicBlock *TrueBB; + MachineBasicBlock *FalseBB; + SmallVector<MachineOperand, 4> BrCond; + SmallVector<MachineOperand, 4> Predicate; + BBInfo() : IsDone(false), IsBeingAnalyzed(false), + IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), + HasFallThrough(false), IsUnpredicable(false), + CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), + BB(0), TrueBB(0), FalseBB(0) {} + }; + + /// IfcvtToken - Record information about pending if-conversions to attempt: + /// BBI - Corresponding BBInfo. + /// Kind - Type of block. See IfcvtKind. + /// NeedSubsumption - True if the to-be-predicated BB has already been + /// predicated. + /// NumDups - Number of instructions that would be duplicated due + /// to this if-conversion. (For diamonds, the number of + /// identical instructions at the beginnings of both + /// paths). + /// NumDups2 - For diamonds, the number of identical instructions + /// at the ends of both paths. + struct IfcvtToken { + BBInfo &BBI; + IfcvtKind Kind; + bool NeedSubsumption; + unsigned NumDups; + unsigned NumDups2; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) + : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} + }; + + /// Roots - Basic blocks that do not have successors. These are the starting + /// points of Graph traversal. + std::vector<MachineBasicBlock*> Roots; + + /// BBAnalysis - Results of if-conversion feasibility analysis indexed by + /// basic block number. + std::vector<BBInfo> BBAnalysis; + + const TargetLowering *TLI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + bool MadeChange; + int FnNum; + public: + static char ID; + IfConverter() : MachineFunctionPass(ID), FnNum(-1) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "If Converter"; } + + private: + bool ReverseBranchCondition(BBInfo &BBI); + bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const; + bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups) const; + bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const; + void ScanInstructions(BBInfo &BBI); + BBInfo &AnalyzeBlock(MachineBasicBlock *BB, + std::vector<IfcvtToken*> &Tokens); + bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, + bool isTriangle = false, bool RevBranch = false); + void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens); + void InvalidatePreds(MachineBasicBlock *BB); + void RemoveExtraEdges(BBInfo &BBI); + bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2); + void PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs); + void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs, + bool IgnoreBr = false); + void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); + + bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const { + return Size > 0 && TII->isProfitableToIfCvt(BB, Size); + } + + bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize, + MachineBasicBlock &FBB, unsigned FSize) const { + return TSize > 0 && FSize > 0 && + TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize); + } + + // blockAlwaysFallThrough - Block ends without a terminator. + bool blockAlwaysFallThrough(BBInfo &BBI) const { + return BBI.IsBrAnalyzable && BBI.TrueBB == NULL; + } + + // IfcvtTokenCmp - Used to sort if-conversion candidates. + static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) { + int Incr1 = (C1->Kind == ICDiamond) + ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups; + int Incr2 = (C2->Kind == ICDiamond) + ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups; + if (Incr1 > Incr2) + return true; + else if (Incr1 == Incr2) { + // Favors subsumption. + if (C1->NeedSubsumption == false && C2->NeedSubsumption == true) + return true; + else if (C1->NeedSubsumption == C2->NeedSubsumption) { + // Favors diamond over triangle, etc. + if ((unsigned)C1->Kind < (unsigned)C2->Kind) + return true; + else if (C1->Kind == C2->Kind) + return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber(); + } + } + return false; + } + }; + + char IfConverter::ID = 0; +} + +INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false); + +FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } + +bool IfConverter::runOnMachineFunction(MachineFunction &MF) { + TLI = MF.getTarget().getTargetLowering(); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + if (!TII) return false; + + // Tail merge tend to expose more if-conversion opportunities. + BranchFolder BF(true); + bool BFChange = BF.OptimizeFunction(MF, TII, + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); + + DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" + << MF.getFunction()->getName() << "\'"); + + if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { + DEBUG(dbgs() << " skipped\n"); + return false; + } + DEBUG(dbgs() << "\n"); + + MF.RenumberBlocks(); + BBAnalysis.resize(MF.getNumBlockIDs()); + + // Look for root nodes, i.e. blocks without successors. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + if (I->succ_empty()) + Roots.push_back(I); + + std::vector<IfcvtToken*> Tokens; + MadeChange = false; + unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + + NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) { + // Do an initial analysis for each basic block and find all the potential + // candidates to perform if-conversion. + bool Change = false; + AnalyzeBlocks(MF, Tokens); + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + BBInfo &BBI = Token->BBI; + IfcvtKind Kind = Token->Kind; + unsigned NumDups = Token->NumDups; + unsigned NumDups2 = Token->NumDups2; + + delete Token; + + // If the block has been evicted out of the queue or it has already been + // marked dead (due to it being predicated), then skip it. + if (BBI.IsDone) + BBI.IsEnqueued = false; + if (!BBI.IsEnqueued) + continue; + + BBI.IsEnqueued = false; + + bool RetVal = false; + switch (Kind) { + default: assert(false && "Unexpected!"); + break; + case ICSimple: + case ICSimpleFalse: { + bool isFalse = Kind == ICSimpleFalse; + if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; + DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? + " false" : "") + << "): BB#" << BBI.BB->getNumber() << " (" + << ((Kind == ICSimpleFalse) + ? BBI.FalseBB->getNumber() + : BBI.TrueBB->getNumber()) << ") "); + RetVal = IfConvertSimple(BBI, Kind); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) { + if (isFalse) ++NumSimpleFalse; + else ++NumSimple; + } + break; + } + case ICTriangle: + case ICTriangleRev: + case ICTriangleFalse: + case ICTriangleFRev: { + bool isFalse = Kind == ICTriangleFalse; + bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev); + if (DisableTriangle && !isFalse && !isRev) break; + if (DisableTriangleR && !isFalse && isRev) break; + if (DisableTriangleF && isFalse && !isRev) break; + if (DisableTriangleFR && isFalse && isRev) break; + DEBUG(dbgs() << "Ifcvt (Triangle"); + if (isFalse) + DEBUG(dbgs() << " false"); + if (isRev) + DEBUG(dbgs() << " rev"); + DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); + RetVal = IfConvertTriangle(BBI, Kind); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) { + if (isFalse) { + if (isRev) ++NumTriangleFRev; + else ++NumTriangleFalse; + } else { + if (isRev) ++NumTriangleRev; + else ++NumTriangle; + } + } + break; + } + case ICDiamond: { + if (DisableDiamond) break; + DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); + RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) ++NumDiamonds; + break; + } + } + + Change |= RetVal; + + NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev + + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit) + break; + } + + if (!Change) + break; + MadeChange |= Change; + } + + // Delete tokens in case of early exit. + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + delete Token; + } + + Tokens.clear(); + Roots.clear(); + BBAnalysis.clear(); + + if (MadeChange && IfCvtBranchFold) { + BranchFolder BF(false); + BF.OptimizeFunction(MF, TII, + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); + } + + MadeChange |= BFChange; + return MadeChange; +} + +/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given +/// its 'true' successor. +static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, + MachineBasicBlock *TrueBB) { + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + E = BB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBB = *SI; + if (SuccBB != TrueBB) + return SuccBB; + } + return NULL; +} + +/// ReverseBranchCondition - Reverse the condition of the end of the block +/// branch. Swap block's 'true' and 'false' successors. +bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { + DebugLoc dl; // FIXME: this is nowhere + if (!TII->ReverseBranchCondition(BBI.BrCond)) { + TII->RemoveBranch(*BBI.BB); + TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl); + std::swap(BBI.TrueBB, BBI.FalseBB); + return true; + } + return false; +} + +/// getNextBlock - Returns the next block in the function blocks ordering. If +/// it is the end, returns NULL. +static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { + MachineFunction::iterator I = BB; + MachineFunction::iterator E = BB->getParent()->end(); + if (++I == E) + return NULL; + return I; +} + +/// ValidSimple - Returns true if the 'true' block (along with its +/// predecessor) forms a valid simple shape for ifcvt. It also returns the +/// number of instructions that the ifcvt would need to duplicate if performed +/// in Dups. +bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.IsBrAnalyzable) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied || + !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize)) + return false; + Dups = TrueBBI.NonPredSize; + } + + return true; +} + +/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid triangle shape for ifcvt. +/// If 'FalseBranch' is true, it checks if 'true' block's false branch +/// branches to the 'false' block rather than the other way around. It also +/// returns the number of instructions that the ifcvt would need to duplicate +/// if performed in 'Dups'. +bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied) + return false; + + unsigned Size = TrueBBI.NonPredSize; + if (TrueBBI.IsBrAnalyzable) { + if (TrueBBI.TrueBB && TrueBBI.BrCond.empty()) + // Ends with an unconditional branch. It will be removed. + --Size; + else { + MachineBasicBlock *FExit = FalseBranch + ? TrueBBI.TrueBB : TrueBBI.FalseBB; + if (FExit) + // Require a conditional branch + ++Size; + } + } + if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size)) + return false; + Dups = Size; + } + + MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB; + if (!TExit && blockAlwaysFallThrough(TrueBBI)) { + MachineFunction::iterator I = TrueBBI.BB; + if (++I == TrueBBI.BB->getParent()->end()) + return false; + TExit = I; + } + return TExit && TExit == FalseBBI.BB; +} + +static +MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB, + const TargetInstrInfo *TII) { + MachineBasicBlock::iterator I = BB->end(); + while (I != BB->begin()) { + --I; + if (!I->getDesc().isBranch()) + break; + } + return I; +} + +/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid diamond shape for ifcvt. +bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) + return false; + + MachineBasicBlock *TT = TrueBBI.TrueBB; + MachineBasicBlock *FT = FalseBBI.TrueBB; + + if (!TT && blockAlwaysFallThrough(TrueBBI)) + TT = getNextBlock(TrueBBI.BB); + if (!FT && blockAlwaysFallThrough(FalseBBI)) + FT = getNextBlock(FalseBBI.BB); + if (TT != FT) + return false; + if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) + return false; + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // FIXME: Allow true block to have an early exit? + if (TrueBBI.FalseBB || FalseBBI.FalseBB || + (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) + return false; + + MachineBasicBlock::iterator TI = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FI = FalseBBI.BB->begin(); + MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); + MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); + // Skip dbg_value instructions + while (TI != TIE && TI->isDebugValue()) + ++TI; + while (FI != FIE && FI->isDebugValue()) + ++FI; + while (TI != TIE && FI != FIE) { + // Skip dbg_value instructions. These do not count. + if (TI->isDebugValue()) { + while (TI != TIE && TI->isDebugValue()) + ++TI; + if (TI == TIE) + break; + } + if (FI->isDebugValue()) { + while (FI != FIE && FI->isDebugValue()) + ++FI; + if (FI == FIE) + break; + } + if (!TI->isIdenticalTo(FI)) + break; + ++Dups1; + ++TI; + ++FI; + } + + TI = firstNonBranchInst(TrueBBI.BB, TII); + FI = firstNonBranchInst(FalseBBI.BB, TII); + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); + // Skip dbg_value instructions at end of the bb's. + while (TI != TIB && TI->isDebugValue()) + --TI; + while (FI != FIB && FI->isDebugValue()) + --FI; + while (TI != TIB && FI != FIB) { + // Skip dbg_value instructions. These do not count. + if (TI->isDebugValue()) { + while (TI != TIB && TI->isDebugValue()) + --TI; + if (TI == TIB) + break; + } + if (FI->isDebugValue()) { + while (FI != FIB && FI->isDebugValue()) + --FI; + if (FI == FIB) + break; + } + if (!TI->isIdenticalTo(FI)) + break; + ++Dups2; + --TI; + --FI; + } + + return true; +} + +/// ScanInstructions - Scan all the instructions in the block to determine if +/// the block is predicable. In most cases, that means all the instructions +/// in the block are isPredicable(). Also checks if the block contains any +/// instruction which can clobber a predicate (e.g. condition code register). +/// If so, the block is not predicable unless it's the last instruction. +void IfConverter::ScanInstructions(BBInfo &BBI) { + if (BBI.IsDone) + return; + + bool AlreadyPredicated = BBI.Predicate.size() > 0; + // First analyze the end of BB branches. + BBI.TrueBB = BBI.FalseBB = NULL; + BBI.BrCond.clear(); + BBI.IsBrAnalyzable = + !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL; + + if (BBI.BrCond.size()) { + // No false branch. This BB must end with a conditional branch and a + // fallthrough. + if (!BBI.FalseBB) + BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); + if (!BBI.FalseBB) { + // Malformed bcc? True and false blocks are the same? + BBI.IsUnpredicable = true; + return; + } + } + + // Then scan all the instructions. + BBI.NonPredSize = 0; + BBI.ClobbersPred = false; + for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); + I != E; ++I) { + if (I->isDebugValue()) + continue; + + const TargetInstrDesc &TID = I->getDesc(); + if (TID.isNotDuplicable()) + BBI.CannotBeCopied = true; + + bool isPredicated = TII->isPredicated(I); + bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch(); + + if (!isCondBr) { + if (!isPredicated) + BBI.NonPredSize++; + else if (!AlreadyPredicated) { + // FIXME: This instruction is already predicated before the + // if-conversion pass. It's probably something like a conditional move. + // Mark this block unpredicable for now. + BBI.IsUnpredicable = true; + return; + } + } + + if (BBI.ClobbersPred && !isPredicated) { + // Predicate modification instruction should end the block (except for + // already predicated instructions and end of block branches). + if (isCondBr) { + // A conditional branch is not predicable, but it may be eliminated. + continue; + } + + // Predicate may have been modified, the subsequent (currently) + // unpredicated instructions cannot be correctly predicated. + BBI.IsUnpredicable = true; + return; + } + + // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are + // still potentially predicable. + std::vector<MachineOperand> PredDefs; + if (TII->DefinesPredicate(I, PredDefs)) + BBI.ClobbersPred = true; + + if (!TII->isPredicable(I)) { + BBI.IsUnpredicable = true; + return; + } + } +} + +/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be +/// predicated by the specified predicate. +bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, + SmallVectorImpl<MachineOperand> &Pred, + bool isTriangle, bool RevBranch) { + // If the block is dead or unpredicable, then it cannot be predicated. + if (BBI.IsDone || BBI.IsUnpredicable) + return false; + + // If it is already predicated, check if its predicate subsumes the new + // predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred)) + return false; + + if (BBI.BrCond.size()) { + if (!isTriangle) + return false; + + // Test predicate subsumption. + SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end()); + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (RevBranch) { + if (TII->ReverseBranchCondition(Cond)) + return false; + } + if (TII->ReverseBranchCondition(RevPred) || + !TII->SubsumesPredicate(Cond, RevPred)) + return false; + } + + return true; +} + +/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from +/// the specified block. Record its successors and whether it looks like an +/// if-conversion candidate. +IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, + std::vector<IfcvtToken*> &Tokens) { + BBInfo &BBI = BBAnalysis[BB->getNumber()]; + + if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed) + return BBI; + + BBI.BB = BB; + BBI.IsBeingAnalyzed = true; + + ScanInstructions(BBI); + + // Unanalyzable or ends with fallthrough or unconditional branch. + if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + // Do not ifcvt if either path is a back edge to the entry block. + if (BBI.TrueBB == BB || BBI.FalseBB == BB) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + // Do not ifcvt if true and false fallthrough blocks are the same. + if (!BBI.FalseBB) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens); + BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens); + + if (TrueBBI.IsDone && FalseBBI.IsDone) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + bool CanRevCond = !TII->ReverseBranchCondition(RevCond); + + unsigned Dups = 0; + unsigned Dups2 = 0; + bool TNeedSub = TrueBBI.Predicate.size() > 0; + bool FNeedSub = FalseBBI.Predicate.size() > 0; + bool Enqueued = false; + if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2), + *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + // Diamond: + // EBB + // / \_ + // | | + // TBB FBB + // \ / + // TailBB + // Note TailBB can be empty. + Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups, + Dups2)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { + // Triangle: + // EBB + // | \_ + // | | + // | TBB + // | / + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(TrueBBI, Dups) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { + // Simple (split, no rejoin): + // EBB + // | \_ + // | | + // | TBB---> exit + // | + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups)); + Enqueued = true; + } + + if (CanRevCond) { + // Try the other path... + if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(FalseBBI, Dups) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); + Enqueued = true; + } + } + + BBI.IsEnqueued = Enqueued; + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; +} + +/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion +/// candidates. +void IfConverter::AnalyzeBlocks(MachineFunction &MF, + std::vector<IfcvtToken*> &Tokens) { + std::set<MachineBasicBlock*> Visited; + for (unsigned i = 0, e = Roots.size(); i != e; ++i) { + for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited), + E = idf_ext_end(Roots[i], Visited); I != E; ++I) { + MachineBasicBlock *BB = *I; + AnalyzeBlock(BB, Tokens); + } + } + + // Sort to favor more complex ifcvt scheme. + std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); +} + +/// canFallThroughTo - Returns true either if ToBB is the next block after BB or +/// that all the intervening blocks are empty (given BB can fall through to its +/// next block). +static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { + MachineFunction::iterator PI = BB; + MachineFunction::iterator I = llvm::next(PI); + MachineFunction::iterator TI = ToBB; + MachineFunction::iterator E = BB->getParent()->end(); + while (I != TI) { + // Check isSuccessor to avoid case where the next block is empty, but + // it's not a successor. + if (I == E || !I->empty() || !PI->isSuccessor(I)) + return false; + PI = I++; + } + return true; +} + +/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed +/// to determine if it can be if-converted. If predecessor is already enqueued, +/// dequeue it! +void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()]; + if (PBBI.IsDone || PBBI.BB == BB) + continue; + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } +} + +/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB. +/// +static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, + const TargetInstrInfo *TII) { + DebugLoc dl; // FIXME: this is nowhere + SmallVector<MachineOperand, 0> NoCond; + TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl); +} + +/// RemoveExtraEdges - Remove true / false edges if either / both are no longer +/// successors. +void IfConverter::RemoveExtraEdges(BBInfo &BBI) { + MachineBasicBlock *TBB = NULL, *FBB = NULL; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond)) + BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); +} + +/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are +/// modeled as read + write (sort like two-address instructions). These +/// routines track register liveness and add implicit uses to if-converted +/// instructions to conform to the model. +static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs, + const TargetRegisterInfo *TRI) { + for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), + E = BB->livein_end(); I != E; ++I) { + unsigned Reg = *I; + Redefs.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Redefs.insert(*Subreg); + } +} + +static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, + const TargetRegisterInfo *TRI, + bool AddImpUse = false) { + SmallVector<unsigned, 4> Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + Defs.push_back(Reg); + else if (MO.isKill()) { + Redefs.erase(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + Redefs.erase(*SR); + } + } + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + if (Redefs.count(Reg)) { + if (AddImpUse) + // Treat predicated update as read + write. + MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/,false/*IsKill*/)); + } else { + Redefs.insert(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + Redefs.insert(*SR); + } + } +} + +static void UpdatePredRedefs(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallSet<unsigned,4> &Redefs, + const TargetRegisterInfo *TRI) { + while (I != E) { + UpdatePredRedefs(I, Redefs, TRI); + ++I; + } +} + +/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. +/// +bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (Kind == ICSimpleFalse) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (Kind == ICSimpleFalse) + if (TII->ReverseBranchCondition(Cond)) + assert(false && "Unable to reverse branch condition!"); + + // Initialize liveins to the first BB. These are potentiall redefined by + // predicated instructions. + SmallSet<unsigned, 4> Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + + if (CvtBBI->BB->pred_size() > 1) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them, and add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); + } else { + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + + // Merge converted block into entry block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI); + } + + bool IterIfcvt = true; + if (!canFallThroughTo(BBI.BB, NextBBI->BB)) { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + // Now ifcvt'd block will look like this: + // BB: + // ... + // t, f = cmp + // if t op + // b BBf + // + // We cannot further ifcvt this block because the unconditional branch + // will have to be predicated on the new condition, that will not be + // available if cmp executes. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertTriangle - If convert a triangle sub-CFG. +/// +bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + DebugLoc dl; // FIXME: this is nowhere + + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + if (TII->ReverseBranchCondition(Cond)) + assert(false && "Unable to reverse branch condition!"); + + if (Kind == ICTriangleRev || Kind == ICTriangleFRev) { + if (ReverseBranchCondition(*CvtBBI)) { + // BB has been changed, modify its predecessors (except for this + // one) so they don't get ifcvt'ed based on bad intel. + for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(), + E = CvtBBI->BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PBB = *PI; + if (PBB == BBI.BB) + continue; + BBInfo &PBBI = BBAnalysis[PBB->getNumber()]; + if (PBBI.IsEnqueued) { + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } + } + } + } + + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + SmallSet<unsigned, 4> Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + + bool HasEarlyExit = CvtBBI->FalseBB != NULL; + if (CvtBBI->BB->pred_size() > 1) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them, and add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); + } else { + // Predicate the 'true' block after removing its branch. + CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + + // Now merge the entry of the triangle with the true block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI, false); + } + + // If 'true' block has a 'false' successor, add an exit branch to it. + if (HasEarlyExit) { + SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(), + CvtBBI->BrCond.end()); + if (TII->ReverseBranchCondition(RevCond)) + assert(false && "Unable to reverse branch condition!"); + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl); + BBI.BB->addSuccessor(CvtBBI->FalseBB); + } + + // Merge in the 'false' block if the 'false' block has no other + // predecessors. Otherwise, add an unconditional branch to 'false'. + bool FalseBBDead = false; + bool IterIfcvt = true; + bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB); + if (!isFallThrough) { + // Only merge them if the true block does not fallthrough to the false + // block. By not merging them, we make it possible to iteratively + // ifcvt the blocks. + if (!HasEarlyExit && + NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) { + MergeBlocks(BBI, *NextBBI); + FalseBBDead = true; + } else { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + } + // Mixed predicated and unpredicated code. This cannot be iteratively + // predicated. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + if (FalseBBDead) + NextBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertDiamond - If convert a diamond sub-CFG. +/// +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + MachineBasicBlock *TailBB = TrueBBI.TrueBB; + // True block must fall through or end with an unanalyzable terminator. + if (!TailBB) { + if (blockAlwaysFallThrough(TrueBBI)) + TailBB = FalseBBI.TrueBB; + assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); + } + + if (TrueBBI.IsDone || FalseBBI.IsDone || + TrueBBI.BB->pred_size() > 1 || + FalseBBI.BB->pred_size() > 1) { + // Something has changed. It's no longer safe to predicate these blocks. + BBI.IsAnalyzed = false; + TrueBBI.IsAnalyzed = false; + FalseBBI.IsAnalyzed = false; + return false; + } + + // Put the predicated instructions from the 'true' block before the + // instructions from the 'false' block, unless the true block would clobber + // the predicate, in which case, do the opposite. + BBInfo *BBI1 = &TrueBBI; + BBInfo *BBI2 = &FalseBBI; + SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (TII->ReverseBranchCondition(RevCond)) + assert(false && "Unable to reverse branch condition!"); + SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond; + SmallVector<MachineOperand, 4> *Cond2 = &RevCond; + + // Figure out the more profitable ordering. + bool DoSwap = false; + if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred) + DoSwap = true; + else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) { + if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) + DoSwap = true; + } + if (DoSwap) { + std::swap(BBI1, BBI2); + std::swap(Cond1, Cond2); + } + + // Remove the conditional branch from entry to the blocks. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + SmallSet<unsigned, 4> Redefs; + InitPredRedefs(BBI1->BB, Redefs, TRI); + + // Remove the duplicated instructions at the beginnings of both paths. + MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); + MachineBasicBlock::iterator DI2 = BBI2->BB->begin(); + MachineBasicBlock::iterator DIE1 = BBI1->BB->end(); + MachineBasicBlock::iterator DIE2 = BBI2->BB->end(); + // Skip dbg_value instructions + while (DI1 != DIE1 && DI1->isDebugValue()) + ++DI1; + while (DI2 != DIE2 && DI2->isDebugValue()) + ++DI2; + BBI1->NonPredSize -= NumDups1; + BBI2->NonPredSize -= NumDups1; + + // Skip past the dups on each side separately since there may be + // differing dbg_value entries. + for (unsigned i = 0; i < NumDups1; ++DI1) { + if (!DI1->isDebugValue()) + ++i; + } + while (NumDups1 != 0) { + ++DI2; + if (!DI2->isDebugValue()) + --NumDups1; + } + + UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI); + BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); + BBI2->BB->erase(BBI2->BB->begin(), DI2); + + // Predicate the 'true' block after removing its branch. + BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + DI1 = BBI1->BB->end(); + for (unsigned i = 0; i != NumDups2; ) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI1 != BBI1->BB->begin()); + --DI1; + // skip dbg_value instructions + if (!DI1->isDebugValue()) + ++i; + } + BBI1->BB->erase(DI1, BBI1->BB->end()); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs); + + // Predicate the 'false' block. + BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); + DI2 = BBI2->BB->end(); + while (NumDups2 != 0) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI2 != BBI2->BB->begin()); + --DI2; + // skip dbg_value instructions + if (!DI2->isDebugValue()) + --NumDups2; + } + PredicateBlock(*BBI2, DI2, *Cond2, Redefs); + + // Merge the true block into the entry of the diamond. + MergeBlocks(BBI, *BBI1, TailBB == 0); + MergeBlocks(BBI, *BBI2, TailBB == 0); + + // If the if-converted block falls through or unconditionally branches into + // the tail block, and the tail block does not have other predecessors, then + // fold the tail block in as well. Otherwise, unless it falls through to the + // tail, add a unconditional branch to it. + if (TailBB) { + BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; + bool CanMergeTail = !TailBBI.HasFallThrough; + // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; + // check if there are any other predecessors besides those. + unsigned NumPreds = TailBB->pred_size(); + if (NumPreds > 1) + CanMergeTail = false; + else if (NumPreds == 1 && CanMergeTail) { + MachineBasicBlock::pred_iterator PI = TailBB->pred_begin(); + if (*PI != BBI1->BB && *PI != BBI2->BB) + CanMergeTail = false; + } + if (CanMergeTail) { + MergeBlocks(BBI, TailBBI); + TailBBI.IsDone = true; + } else { + BBI.BB->addSuccessor(TailBB); + InsertUncondBranch(BBI.BB, TailBB, TII); + BBI.HasFallThrough = false; + } + } + + // RemoveExtraEdges won't work if the block has an unanalyzable branch, + // which can happen here if TailBB is unanalyzable and is merged, so + // explicitly remove BBI1 and BBI2 as successors. + BBI.BB->removeSuccessor(BBI1->BB); + BBI.BB->removeSuccessor(BBI2->BB); + RemoveExtraEdges(BBI); + + // Update block info. + BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; + InvalidatePreds(BBI.BB); + + // FIXME: Must maintain LiveIns. + return true; +} + +/// PredicateBlock - Predicate instructions from the start of the block to the +/// specified end with the specified condition. +void IfConverter::PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs) { + for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { + if (I->isDebugValue() || TII->isPredicated(I)) + continue; + if (!TII->PredicateInstruction(I, Cond)) { +#ifndef NDEBUG + dbgs() << "Unable to predicate " << *I << "!\n"; +#endif + llvm_unreachable(0); + } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(I, Redefs, TRI, true); + } + + std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); + + BBI.IsAnalyzed = false; + BBI.NonPredSize = 0; + + ++NumIfConvBBs; +} + +/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to +/// the destination block. Skip end of block branches if IgnoreBr is true. +void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs, + bool IgnoreBr) { + MachineFunction &MF = *ToBBI.BB->getParent(); + + for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), + E = FromBBI.BB->end(); I != E; ++I) { + const TargetInstrDesc &TID = I->getDesc(); + // Do not copy the end of the block branches. + if (IgnoreBr && TID.isBranch()) + break; + + MachineInstr *MI = MF.CloneMachineInstr(I); + ToBBI.BB->insert(ToBBI.BB->end(), MI); + ToBBI.NonPredSize++; + + if (!TII->isPredicated(I) && !MI->isDebugValue()) { + if (!TII->PredicateInstruction(MI, Cond)) { +#ifndef NDEBUG + dbgs() << "Unable to predicate " << *I << "!\n"; +#endif + llvm_unreachable(0); + } + } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(MI, Redefs, TRI, true); + } + + if (!IgnoreBr) { + std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + ToBBI.BB->addSuccessor(Succ); + } + } + + std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), + std::back_inserter(ToBBI.Predicate)); + std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate)); + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.IsAnalyzed = false; + + ++NumDupBBs; +} + +/// MergeBlocks - Move all instructions from FromBB to the end of ToBB. +/// This will leave FromBB as an empty block, so remove all of its +/// successor edges except for the fall-through edge. If AddEdges is true, +/// i.e., when FromBBI's branch is being moved, add those successor edges to +/// ToBBI. +void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { + ToBBI.BB->splice(ToBBI.BB->end(), + FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); + + std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + FromBBI.BB->removeSuccessor(Succ); + if (AddEdges) + ToBBI.BB->addSuccessor(Succ); + } + + // Now FromBBI always falls through to the next block! + if (NBB && !FromBBI.BB->isSuccessor(NBB)) + FromBBI.BB->addSuccessor(NBB); + + std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), + std::back_inserter(ToBBI.Predicate)); + FromBBI.Predicate.clear(); + + ToBBI.NonPredSize += FromBBI.NonPredSize; + FromBBI.NonPredSize = 0; + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.HasFallThrough = FromBBI.HasFallThrough; + ToBBI.IsAnalyzed = false; + FromBBI.IsAnalyzed = false; +} diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp new file mode 100644 index 0000000..b965bfd --- /dev/null +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -0,0 +1,480 @@ +//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The inline spiller modifies the machine function directly instead of +// inserting spills and restores in VirtRegMap. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "spiller" +#include "Spiller.h" +#include "SplitKit.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class InlineSpiller : public Spiller { + MachineFunctionPass &pass_; + MachineFunction &mf_; + LiveIntervals &lis_; + MachineLoopInfo &loops_; + VirtRegMap &vrm_; + MachineFrameInfo &mfi_; + MachineRegisterInfo &mri_; + const TargetInstrInfo &tii_; + const TargetRegisterInfo &tri_; + const BitVector reserved_; + + SplitAnalysis splitAnalysis_; + + // Variables that are valid during spill(), but used by multiple methods. + LiveInterval *li_; + SmallVectorImpl<LiveInterval*> *newIntervals_; + const TargetRegisterClass *rc_; + int stackSlot_; + const SmallVectorImpl<LiveInterval*> *spillIs_; + + // Values of the current interval that can potentially remat. + SmallPtrSet<VNInfo*, 8> reMattable_; + + // Values in reMattable_ that failed to remat at some point. + SmallPtrSet<VNInfo*, 8> usedValues_; + + ~InlineSpiller() {} + +public: + InlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) + : pass_(pass), + mf_(mf), + lis_(pass.getAnalysis<LiveIntervals>()), + loops_(pass.getAnalysis<MachineLoopInfo>()), + vrm_(vrm), + mfi_(*mf.getFrameInfo()), + mri_(mf.getRegInfo()), + tii_(*mf.getTarget().getInstrInfo()), + tri_(*mf.getTarget().getRegisterInfo()), + reserved_(tri_.getReservedRegs(mf_)), + splitAnalysis_(mf, lis_, loops_) {} + + void spill(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs); + +private: + bool split(); + + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, + SlotIndex UseIdx); + bool reMaterializeFor(MachineBasicBlock::iterator MI); + void reMaterializeAll(); + + bool coalesceStackAccess(MachineInstr *MI); + bool foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl<unsigned> &Ops); + void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI); + void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI); +}; +} + +namespace llvm { +Spiller *createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) { + return new InlineSpiller(pass, mf, vrm); +} +} + +/// split - try splitting the current interval into pieces that may allocate +/// separately. Return true if successful. +bool InlineSpiller::split() { + splitAnalysis_.analyze(li_); + + if (const MachineLoop *loop = splitAnalysis_.getBestSplitLoop()) { + // We can split, but li_ may be left intact with fewer uses. + if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) + .splitAroundLoop(loop)) + return true; + } + + // Try splitting into single block intervals. + SplitAnalysis::BlockPtrSet blocks; + if (splitAnalysis_.getMultiUseBlocks(blocks)) { + if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) + .splitSingleBlocks(blocks)) + return true; + } + + // Try splitting inside a basic block. + if (const MachineBasicBlock *MBB = splitAnalysis_.getBlockForInsideSplit()) { + if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) + .splitInsideBlock(MBB)) + return true; + } + + // We may have been able to split out some uses, but the original interval is + // intact, and it should still be spilled. + return false; +} + +/// allUsesAvailableAt - Return true if all registers used by OrigMI at +/// OrigIdx are also available with the same value at UseIdx. +bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI, + SlotIndex OrigIdx, + SlotIndex UseIdx) { + OrigIdx = OrigIdx.getUseIndex(); + UseIdx = UseIdx.getUseIndex(); + for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OrigMI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg) + continue; + // Reserved registers are OK. + if (MO.isUndef() || !lis_.hasInterval(MO.getReg())) + continue; + // We don't want to move any defs. + if (MO.isDef()) + return false; + // We cannot depend on virtual registers in spillIs_. They will be spilled. + for (unsigned si = 0, se = spillIs_->size(); si != se; ++si) + if ((*spillIs_)[si]->reg == MO.getReg()) + return false; + + LiveInterval &LI = lis_.getInterval(MO.getReg()); + const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx); + if (!OVNI) + continue; + if (OVNI != LI.getVNInfoAt(UseIdx)) + return false; + } + return true; +} + +/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of +/// reloading it. +bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { + SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex(); + VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx); + if (!OrigVNI) { + DEBUG(dbgs() << "\tadding <undef> flags: "); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) + MO.setIsUndef(); + } + DEBUG(dbgs() << UseIdx << '\t' << *MI); + return true; + } + if (!reMattable_.count(OrigVNI)) { + DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": " + << UseIdx << '\t' << *MI); + return false; + } + MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def); + if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) { + usedValues_.insert(OrigVNI); + DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); + return false; + } + + // If the instruction also writes li_->reg, it had better not require the same + // register for uses and defs. + bool Reads, Writes; + SmallVector<unsigned, 8> Ops; + tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops); + if (Writes) { + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) { + usedValues_.insert(OrigVNI); + DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); + return false; + } + } + } + + // Alocate a new register for the remat. + unsigned NewVReg = mri_.createVirtualRegister(rc_); + vrm_.grow(); + LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg); + NewLI.markNotSpillable(); + newIntervals_->push_back(&NewLI); + + // Finally we can rematerialize OrigMI before MI. + MachineBasicBlock &MBB = *MI->getParent(); + tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_); + MachineBasicBlock::iterator RematMI = MI; + SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex(); + DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *RematMI); + + // Replace operands + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) { + MO.setReg(NewVReg); + MO.setIsKill(); + } + } + DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); + + VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); + DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + return true; +} + +/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible, +/// and trim the live ranges after. +void InlineSpiller::reMaterializeAll() { + // Do a quick scan of the interval values to find if any are remattable. + reMattable_.clear(); + usedValues_.clear(); + for (LiveInterval::const_vni_iterator I = li_->vni_begin(), + E = li_->vni_end(); I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused() || !VNI->isDefAccurate()) + continue; + MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); + if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI)) + continue; + reMattable_.insert(VNI); + } + + // Often, no defs are remattable. + if (reMattable_.empty()) + return; + + // Try to remat before all uses of li_->reg. + bool anyRemat = false; + for (MachineRegisterInfo::use_nodbg_iterator + RI = mri_.use_nodbg_begin(li_->reg); + MachineInstr *MI = RI.skipInstruction();) + anyRemat |= reMaterializeFor(MI); + + if (!anyRemat) + return; + + // Remove any values that were completely rematted. + bool anyRemoved = false; + for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(), + E = reMattable_.end(); I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->hasPHIKill() || usedValues_.count(VNI)) + continue; + MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); + DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI); + lis_.RemoveMachineInstrFromMaps(DefMI); + vrm_.RemoveMachineInstrFromMaps(DefMI); + DefMI->eraseFromParent(); + VNI->setIsDefAccurate(false); + anyRemoved = true; + } + + if (!anyRemoved) + return; + + // Removing values may cause debug uses where li_ is not live. + for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg); + MachineInstr *MI = RI.skipInstruction();) { + if (!MI->isDebugValue()) + continue; + // Try to preserve the debug value if li_ is live immediately after it. + MachineBasicBlock::iterator NextMI = MI; + ++NextMI; + if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { + VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI)); + if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI))) + continue; + } + DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI); + MI->eraseFromParent(); + } +} + +/// If MI is a load or store of stackSlot_, it can be removed. +bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) { + int FI = 0; + unsigned reg; + if (!(reg = tii_.isLoadFromStackSlot(MI, FI)) && + !(reg = tii_.isStoreToStackSlot(MI, FI))) + return false; + + // We have a stack access. Is it the right register and slot? + if (reg != li_->reg || FI != stackSlot_) + return false; + + DEBUG(dbgs() << "Coalescing stack access: " << *MI); + lis_.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + return true; +} + +/// foldMemoryOperand - Try folding stack slot references in Ops into MI. +/// Return true on success, and MI will be erased. +bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl<unsigned> &Ops) { + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied + // operands. + SmallVector<unsigned, 8> FoldOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + unsigned Idx = Ops[i]; + MachineOperand &MO = MI->getOperand(Idx); + if (MO.isImplicit()) + continue; + // FIXME: Teach targets to deal with subregs. + if (MO.getSubReg()) + return false; + // Tied use operands should not be passed to foldMemoryOperand. + if (!MI->isRegTiedToDefOperand(Idx)) + FoldOps.push_back(Idx); + } + + MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_); + if (!FoldMI) + return false; + lis_.ReplaceMachineInstrInMaps(MI, FoldMI); + vrm_.addSpillSlotUse(stackSlot_, FoldMI); + MI->eraseFromParent(); + DEBUG(dbgs() << "\tfolded: " << *FoldMI); + return true; +} + +/// insertReload - Insert a reload of NewLI.reg before MI. +void InlineSpiller::insertReload(LiveInterval &NewLI, + MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); + tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_); + --MI; // Point to load instruction. + SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + vrm_.addSpillSlotUse(stackSlot_, MI); + DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); + VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); +} + +/// insertSpill - Insert a spill of NewLI.reg after MI. +void InlineSpiller::insertSpill(LiveInterval &NewLI, + MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); + tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_); + --MI; // Point to store instruction. + SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + vrm_.addSpillSlotUse(stackSlot_, MI); + DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); + VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); +} + +void InlineSpiller::spill(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) { + DEBUG(dbgs() << "Inline spilling " << *li << "\n"); + assert(li->isSpillable() && "Attempting to spill already spilled value."); + assert(!li->isStackSlot() && "Trying to spill a stack slot."); + + li_ = li; + newIntervals_ = &newIntervals; + rc_ = mri_.getRegClass(li->reg); + spillIs_ = &spillIs; + + if (split()) + return; + + reMaterializeAll(); + + // Remat may handle everything. + if (li_->empty()) + return; + + stackSlot_ = vrm_.getStackSlot(li->reg); + if (stackSlot_ == VirtRegMap::NO_STACK_SLOT) + stackSlot_ = vrm_.assignVirt2StackSlot(li->reg); + + // Iterate over instructions using register. + for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg); + MachineInstr *MI = RI.skipInstruction();) { + + // Debug values are not allowed to affect codegen. + if (MI->isDebugValue()) { + // Modify DBG_VALUE now that the value is in a spill slot. + uint64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = MI->getOperand(2).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_, + Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + } else { + DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); + MI->eraseFromParent(); + } + continue; + } + + // Stack slot accesses may coalesce away. + if (coalesceStackAccess(MI)) + continue; + + // Analyze instruction. + bool Reads, Writes; + SmallVector<unsigned, 8> Ops; + tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops); + + // Attempt to fold memory ops. + if (foldMemoryOperand(MI, Ops)) + continue; + + // Allocate interval around instruction. + // FIXME: Infer regclass from instruction alone. + unsigned NewVReg = mri_.createVirtualRegister(rc_); + vrm_.grow(); + LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg); + NewLI.markNotSpillable(); + + if (Reads) + insertReload(NewLI, MI); + + // Rewrite instruction operands. + bool hasLiveDef = false; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + MO.setReg(NewVReg); + if (MO.isUse()) { + if (!MI->isRegTiedToDefOperand(Ops[i])) + MO.setIsKill(); + } else { + if (!MO.isDead()) + hasLiveDef = true; + } + } + + // FIXME: Use a second vreg if instruction has no tied ops. + if (Writes && hasLiveDef) + insertSpill(NewLI, MI); + + DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + newIntervals.push_back(&NewLI); + } +} diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp new file mode 100644 index 0000000..3852eba --- /dev/null +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -0,0 +1,538 @@ +//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IntrinsicLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +template <class ArgIt> +static void EnsureFunctionExists(Module &M, const char *Name, + ArgIt ArgBegin, ArgIt ArgEnd, + const Type *RetTy) { + // Insert a correctly-typed definition now. + std::vector<const Type *> ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back(I->getType()); + M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); +} + +static void EnsureFPIntrinsicsExist(Module &M, Function *Fn, + const char *FName, + const char *DName, const char *LDName) { + // Insert definitions for all the floating point types. + switch((int)Fn->arg_begin()->getType()->getTypeID()) { + case Type::FloatTyID: + EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(), + Type::getFloatTy(M.getContext())); + break; + case Type::DoubleTyID: + EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(), + Type::getDoubleTy(M.getContext())); + break; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(), + Fn->arg_begin()->getType()); + break; + } +} + +/// ReplaceCallWith - This function is used when we want to lower an intrinsic +/// call to a call of an external function. This handles hard cases such as +/// when there was already a prototype for the external function, and if that +/// prototype doesn't match the arguments we expect to pass in. +template <class ArgIt> +static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, + ArgIt ArgBegin, ArgIt ArgEnd, + const Type *RetTy) { + // If we haven't already looked up this function, check to see if the + // program already contains a function with this name. + Module *M = CI->getParent()->getParent()->getParent(); + // Get or insert the definition now. + std::vector<const Type *> ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back((*I)->getType()); + Constant* FCache = M->getOrInsertFunction(NewFn, + FunctionType::get(RetTy, ParamTys, false)); + + IRBuilder<> Builder(CI->getParent(), CI); + SmallVector<Value *, 8> Args(ArgBegin, ArgEnd); + CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end()); + NewCI->setName(CI->getName()); + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewCI); + return NewCI; +} + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) +#define setjmp_undefined_for_visual_studio +#undef setjmp +#endif + +void IntrinsicLowering::AddPrototypes(Module &M) { + LLVMContext &Context = M.getContext(); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->isDeclaration() && !I->use_empty()) + switch (I->getIntrinsicID()) { + default: break; + case Intrinsic::setjmp: + EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(), + Type::getInt32Ty(M.getContext())); + break; + case Intrinsic::longjmp: + EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(), + Type::getVoidTy(M.getContext())); + break; + case Intrinsic::siglongjmp: + EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(), + Type::getVoidTy(M.getContext())); + break; + case Intrinsic::memcpy: + M.getOrInsertFunction("memcpy", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + TD.getIntPtrType(Context), (Type *)0); + break; + case Intrinsic::memmove: + M.getOrInsertFunction("memmove", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + TD.getIntPtrType(Context), (Type *)0); + break; + case Intrinsic::memset: + M.getOrInsertFunction("memset", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt32Ty(M.getContext()), + TD.getIntPtrType(Context), (Type *)0); + break; + case Intrinsic::sqrt: + EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); + break; + case Intrinsic::sin: + EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl"); + break; + case Intrinsic::cos: + EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl"); + break; + case Intrinsic::pow: + EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl"); + break; + case Intrinsic::log: + EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl"); + break; + case Intrinsic::log2: + EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l"); + break; + case Intrinsic::log10: + EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l"); + break; + case Intrinsic::exp: + EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl"); + break; + case Intrinsic::exp2: + EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l"); + break; + } +} + +/// LowerBSWAP - Emit the code to lower bswap of V before the specified +/// instruction IP. +static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { + assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!"); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + + IRBuilder<> Builder(IP->getParent(), IP); + + switch(BitSize) { + default: llvm_unreachable("Unhandled type size of value to byteswap!"); + case 16: { + Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.2"); + Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.1"); + V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16"); + break; + } + case 32: { + Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24), + "bswap.4"); + Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.3"); + Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.2"); + Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24), + "bswap.1"); + Tmp3 = Builder.CreateAnd(Tmp3, + ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000), + "bswap.and3"); + Tmp2 = Builder.CreateAnd(Tmp2, + ConstantInt::get(Type::getInt32Ty(Context), 0xFF00), + "bswap.and2"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1"); + Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2"); + V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32"); + break; + } + case 64: { + Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56), + "bswap.8"); + Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40), + "bswap.7"); + Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24), + "bswap.6"); + Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.5"); + Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.4"); + Value* Tmp3 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 24), + "bswap.3"); + Value* Tmp2 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 40), + "bswap.2"); + Value* Tmp1 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 56), + "bswap.1"); + Tmp7 = Builder.CreateAnd(Tmp7, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF000000000000ULL), + "bswap.and7"); + Tmp6 = Builder.CreateAnd(Tmp6, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF0000000000ULL), + "bswap.and6"); + Tmp5 = Builder.CreateAnd(Tmp5, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF00000000ULL), + "bswap.and5"); + Tmp4 = Builder.CreateAnd(Tmp4, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF000000ULL), + "bswap.and4"); + Tmp3 = Builder.CreateAnd(Tmp3, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF0000ULL), + "bswap.and3"); + Tmp2 = Builder.CreateAnd(Tmp2, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF00ULL), + "bswap.and2"); + Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1"); + Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3"); + Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4"); + Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6"); + V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64"); + break; + } + } + return V; +} + +/// LowerCTPOP - Emit the code to lower ctpop of V before the specified +/// instruction IP. +static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { + assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!"); + + static const uint64_t MaskValues[6] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, + 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, + 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL + }; + + IRBuilder<> Builder(IP->getParent(), IP); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + unsigned WordSize = (BitSize + 63) / 64; + Value *Count = ConstantInt::get(V->getType(), 0); + + for (unsigned n = 0; n < WordSize; ++n) { + Value *PartValue = V; + for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); + i <<= 1, ++ct) { + Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]); + Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1"); + Value *VShift = Builder.CreateLShr(PartValue, + ConstantInt::get(V->getType(), i), + "ctpop.sh"); + Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2"); + PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step"); + } + Count = Builder.CreateAdd(PartValue, Count, "ctpop.part"); + if (BitSize > 64) { + V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64), + "ctpop.part.sh"); + BitSize -= 64; + } + } + + return Count; +} + +/// LowerCTLZ - Emit the code to lower ctlz of V before the specified +/// instruction IP. +static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { + + IRBuilder<> Builder(IP->getParent(), IP); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + for (unsigned i = 1; i < BitSize; i <<= 1) { + Value *ShVal = ConstantInt::get(V->getType(), i); + ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh"); + V = Builder.CreateOr(V, ShVal, "ctlz.step"); + } + + V = Builder.CreateNot(V); + return LowerCTPOP(Context, V, IP); +} + +static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, + const char *Dname, + const char *LDname) { + CallSite CS(CI); + switch (CI->getArgOperand(0)->getType()->getTypeID()) { + default: llvm_unreachable("Invalid type in intrinsic"); + case Type::FloatTyID: + ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(), + Type::getFloatTy(CI->getContext())); + break; + case Type::DoubleTyID: + ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(), + Type::getDoubleTy(CI->getContext())); + break; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(), + CI->getArgOperand(0)->getType()); + break; + } +} + +void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { + IRBuilder<> Builder(CI->getParent(), CI); + LLVMContext &Context = CI->getContext(); + + const Function *Callee = CI->getCalledFunction(); + assert(Callee && "Cannot lower an indirect call!"); + + CallSite CS(CI); + switch (Callee->getIntrinsicID()) { + case Intrinsic::not_intrinsic: + report_fatal_error("Cannot lower a call to a non-intrinsic function '"+ + Callee->getName() + "'!"); + default: + report_fatal_error("Code generator does not support intrinsic function '"+ + Callee->getName()+"'!"); + + // The setjmp/longjmp intrinsics should only exist in the code if it was + // never optimized (ie, right out of the CFE), or if it has been hacked on + // by the lowerinvoke pass. In both cases, the right thing to do is to + // convert the call to an explicit setjmp or longjmp call. + case Intrinsic::setjmp: { + Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(), + Type::getInt32Ty(Context)); + if (!CI->getType()->isVoidTy()) + CI->replaceAllUsesWith(V); + break; + } + case Intrinsic::sigsetjmp: + if (!CI->getType()->isVoidTy()) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + + case Intrinsic::longjmp: { + ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(), + Type::getVoidTy(Context)); + break; + } + + case Intrinsic::siglongjmp: { + // Insert the call to abort + ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), + Type::getVoidTy(Context)); + break; + } + case Intrinsic::ctpop: + CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI)); + break; + + case Intrinsic::bswap: + CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI)); + break; + + case Intrinsic::ctlz: + CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI)); + break; + + case Intrinsic::cttz: { + // cttz(x) -> ctpop(~X & (X-1)) + Value *Src = CI->getArgOperand(0); + Value *NotSrc = Builder.CreateNot(Src); + NotSrc->setName(Src->getName() + ".not"); + Value *SrcM1 = ConstantInt::get(Src->getType(), 1); + SrcM1 = Builder.CreateSub(Src, SrcM1); + Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI); + CI->replaceAllUsesWith(Src); + break; + } + + case Intrinsic::stacksave: + case Intrinsic::stackrestore: { + if (!Warned) + errs() << "WARNING: this target does not support the llvm.stack" + << (Callee->getIntrinsicID() == Intrinsic::stacksave ? + "save" : "restore") << " intrinsic.\n"; + Warned = true; + if (Callee->getIntrinsicID() == Intrinsic::stacksave) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + } + + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + errs() << "WARNING: this target does not support the llvm." + << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? + "return" : "frame") << "address intrinsic.\n"; + CI->replaceAllUsesWith(ConstantPointerNull::get( + cast<PointerType>(CI->getType()))); + break; + + case Intrinsic::prefetch: + break; // Simply strip out prefetches on unsupported architectures + + case Intrinsic::pcmarker: + break; // Simply strip out pcmarker on unsupported architectures + case Intrinsic::readcyclecounter: { + errs() << "WARNING: this target does not support the llvm.readcyclecoun" + << "ter intrinsic. It is being lowered to a constant 0\n"; + CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0)); + break; + } + + case Intrinsic::dbg_declare: + break; // Simply strip out debugging intrinsics + + case Intrinsic::eh_exception: + case Intrinsic::eh_selector: + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + + case Intrinsic::eh_typeid_for: + // Return something different to eh_selector. + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); + break; + + case Intrinsic::var_annotation: + break; // Strip out annotate intrinsic + + case Intrinsic::memcpy: { + const IntegerType *IntPtr = TD.getIntPtrType(Context); + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); + Ops[2] = Size; + ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); + break; + } + case Intrinsic::memmove: { + const IntegerType *IntPtr = TD.getIntPtrType(Context); + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); + Ops[2] = Size; + ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); + break; + } + case Intrinsic::memset: { + const IntegerType *IntPtr = TD.getIntPtrType(Context); + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getArgOperand(0); + // Extend the amount to i32. + Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), + Type::getInt32Ty(Context), + /* isSigned */ false); + Ops[2] = Size; + ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); + break; + } + case Intrinsic::sqrt: { + ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl"); + break; + } + case Intrinsic::log: { + ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl"); + break; + } + case Intrinsic::log2: { + ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l"); + break; + } + case Intrinsic::log10: { + ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l"); + break; + } + case Intrinsic::exp: { + ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl"); + break; + } + case Intrinsic::exp2: { + ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l"); + break; + } + case Intrinsic::pow: { + ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl"); + break; + } + case Intrinsic::flt_rounds: + // Lower to "round to the nearest" + if (!CI->getType()->isVoidTy()) + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); + break; + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + // Discard region information. + CI->replaceAllUsesWith(UndefValue::get(CI->getType())); + break; + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + // Discard region information. + break; + } + + assert(CI->use_empty() && + "Lowering should have eliminated any uses of the intrinsic call!"); + CI->eraseFromParent(); +} diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp new file mode 100644 index 0000000..3603802 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -0,0 +1,443 @@ +//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVMTargetMachine class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetMachine.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +namespace llvm { + bool EnableFastISel; +} + +static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden, + cl::desc("Disable Post Regalloc")); +static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, + cl::desc("Disable branch folding")); +static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, + cl::desc("Disable tail duplication")); +static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden, + cl::desc("Disable pre-register allocation tail duplication")); +static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden, + cl::desc("Disable code placement")); +static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, + cl::desc("Disable Stack Slot Coloring")); +static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, + cl::desc("Disable Machine LICM")); +static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", + cl::Hidden, + cl::desc("Disable Machine LICM")); +static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden, + cl::desc("Disable Machine Sinking")); +static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden, + cl::desc("Disable Loop Strength Reduction Pass")); +static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden, + cl::desc("Disable Codegen Prepare")); +static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, + cl::desc("Print LLVM IR produced by the loop-reduce pass")); +static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, + cl::desc("Print LLVM IR input to isel pass")); +static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, + cl::desc("Dump garbage collector data")); +static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden, + cl::desc("Show encoding in .s output")); +static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden, + cl::desc("Show instruction structure in .s output")); +static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden, + cl::desc("Enable MC API logging")); +static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, + cl::desc("Verify generated machine code"), + cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); + +static cl::opt<cl::boolOrDefault> +AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), + cl::init(cl::BOU_UNSET)); + +static bool getVerboseAsm() { + switch (AsmVerbose) { + default: + case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault(); + case cl::BOU_TRUE: return true; + case cl::BOU_FALSE: return false; + } +} + +// Enable or disable FastISel. Both options are needed, because +// FastISel is enabled by default with -fast, and we wish to be +// able to enable or disable fast-isel independently from -O0. +static cl::opt<cl::boolOrDefault> +EnableFastISelOption("fast-isel", cl::Hidden, + cl::desc("Enable the \"fast\" instruction selector")); + +// Enable or disable an experimental optimization to split GEPs +// and run a special GVN pass which does not examine loads, in +// an effort to factor out redundancy implicit in complex GEPs. +static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden, + cl::desc("Split GEPs and run no-load GVN")); + +LLVMTargetMachine::LLVMTargetMachine(const Target &T, + const std::string &Triple) + : TargetMachine(T), TargetTriple(Triple) { + AsmInfo = T.createAsmInfo(TargetTriple); +} + +// Set the default code model for the JIT for a generic target. +// FIXME: Is small right here? or .is64Bit() ? Large : Small? +void LLVMTargetMachine::setCodeModelForJIT() { + setCodeModel(CodeModel::Small); +} + +// Set the default code model for static compilation for a generic target. +void LLVMTargetMachine::setCodeModelForStatic() { + setCodeModel(CodeModel::Small); +} + +bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { + // Add common CodeGen passes. + MCContext *Context = 0; + if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) + return true; + assert(Context != 0 && "Failed to get MCContext"); + + const MCAsmInfo &MAI = *getMCAsmInfo(); + OwningPtr<MCStreamer> AsmStreamer; + + switch (FileType) { + default: return true; + case CGFT_AssemblyFile: { + MCInstPrinter *InstPrinter = + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI); + + // Create a code emitter if asked to show the encoding. + MCCodeEmitter *MCE = 0; + if (ShowMCEncoding) + MCE = getTarget().createCodeEmitter(*this, *Context); + + AsmStreamer.reset(createAsmStreamer(*Context, Out, + getTargetData()->isLittleEndian(), + getVerboseAsm(), InstPrinter, + MCE, ShowMCInst)); + break; + } + case CGFT_ObjectFile: { + // Create the code emitter for the target if it exists. If not, .o file + // emission fails. + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context); + TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); + if (MCE == 0 || TAB == 0) + return true; + + AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context, + *TAB, Out, MCE, + hasMCRelaxAll())); + break; + } + case CGFT_Null: + // The Null output is intended for use for performance analysis and testing, + // not real users. + AsmStreamer.reset(createNullStreamer(*Context)); + break; + } + + if (EnableMCLogging) + AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + if (Printer == 0) + return true; + + // If successful, createAsmPrinter took ownership of AsmStreamer. + AsmStreamer.take(); + + PM.add(Printer); + + // Make sure the code model is set. + setCodeModelForStatic(); + PM.add(createGCInfoDeleter()); + return false; +} + +/// addPassesToEmitMachineCode - Add passes to the specified pass manager to +/// get machine code emitted. This uses a JITCodeEmitter object to handle +/// actually outputting the machine code and resolving things like the address +/// of functions. This method should returns true if machine code emission is +/// not supported. +/// +bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, + JITCodeEmitter &JCE, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { + // Make sure the code model is set. + setCodeModelForJIT(); + + // Add common CodeGen passes. + MCContext *Ctx = 0; + if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) + return true; + + addCodeEmitter(PM, OptLevel, JCE); + PM.add(createGCInfoDeleter()); + + return false; // success! +} + +/// addPassesToEmitMC - Add passes to the specified pass manager to get +/// machine code emitted with the MCJIT. This method returns true if machine +/// code is not supported. It fills the MCContext Ctx pointer which can be +/// used to build custom MCStreamer. +/// +bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, + MCContext *&Ctx, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { + // Add common CodeGen passes. + if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) + return true; + // Make sure the code model is set. + setCodeModelForJIT(); + + return false; // success! +} + +static void printNoVerify(PassManagerBase &PM, const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); +} + +static void printAndVerify(PassManagerBase &PM, + const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + + if (VerifyMachineCode) + PM.add(createMachineVerifierPass()); +} + +/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both +/// emitting to assembly files or machine code output. +/// +bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DisableVerify, + MCContext *&OutContext) { + // Standard LLVM-Level Passes. + + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Optionally, tun split-GEPs and no-load GVN. + if (EnableSplitGEPGVN) { + PM.add(createGEPSplitterPass()); + PM.add(createGVNPass(/*NoLoads=*/true)); + } + + // Run loop strength reduction before anything else. + if (OptLevel != CodeGenOpt::None && !DisableLSR) { + PM.add(createLoopStrengthReducePass(getTargetLowering())); + if (PrintLSR) + PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + } + + PM.add(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + + // Turn exception handling constructs into something the code generators can + // handle. + switch (getMCAsmInfo()->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. + PM.add(createSjLjEHPass(getTargetLowering())); + // FALLTHROUGH + case ExceptionHandling::Dwarf: + PM.add(createDwarfEHPass(this)); + break; + case ExceptionHandling::None: + PM.add(createLowerInvokePass(getTargetLowering())); + + // The lower invoke pass may create unreachable code. Remove it. + PM.add(createUnreachableBlockEliminationPass()); + break; + } + + if (OptLevel != CodeGenOpt::None && !DisableCGP) + PM.add(createCodeGenPreparePass(getTargetLowering())); + + PM.add(createStackProtectorPass(getTargetLowering())); + + addPreISel(PM, OptLevel); + + if (PrintISelInput) + PM.add(createPrintFunctionPass("\n\n" + "*** Final LLVM Code input to ISel ***\n", + &dbgs())); + + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Standard Lower-Level Passes. + + // Install a MachineModuleInfo class, which is an immutable pass that holds + // all the per-module stuff we're generating, including MCContext. + MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo()); + PM.add(MMI); + OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. + + // Set up a MachineFunction for the rest of CodeGen to work on. + PM.add(new MachineFunctionAnalysis(*this, OptLevel)); + + // Enable FastISel with -fast, but allow that to be overridden. + if (EnableFastISelOption == cl::BOU_TRUE || + (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE)) + EnableFastISel = true; + + // Ask the target for an isel. + if (addInstSelector(PM, OptLevel)) + return true; + + // Print the instruction selected machine code... + printAndVerify(PM, "After Instruction Selection"); + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + if (OptLevel != CodeGenOpt::None) + PM.add(createOptimizePHIsPass()); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + PM.add(createLocalStackSlotAllocationPass()); + + if (OptLevel != CodeGenOpt::None) { + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + PM.add(createDeadMachineInstructionElimPass()); + printAndVerify(PM, "After codegen DCE pass"); + + PM.add(createPeepholeOptimizerPass()); + if (!DisableMachineLICM) + PM.add(createMachineLICMPass()); + PM.add(createMachineCSEPass()); + if (!DisableMachineSink) + PM.add(createMachineSinkingPass()); + printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); + } + + // Pre-ra tail duplication. + if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { + PM.add(createTailDuplicatePass(true)); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + } + + // Run pre-ra passes. + if (addPreRegAlloc(PM, OptLevel)) + printAndVerify(PM, "After PreRegAlloc passes"); + + // Perform register allocation. + PM.add(createRegisterAllocator(OptLevel)); + printAndVerify(PM, "After Register Allocation"); + + // Perform stack slot coloring and post-ra machine LICM. + if (OptLevel != CodeGenOpt::None) { + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + if (!DisableSSC) + PM.add(createStackSlotColoringPass(false)); + + // Run post-ra machine LICM to hoist reloads / remats. + if (!DisablePostRAMachineLICM) + PM.add(createMachineLICMPass(false)); + + printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); + } + + // Run post-ra passes. + if (addPostRegAlloc(PM, OptLevel)) + printAndVerify(PM, "After PostRegAlloc passes"); + + PM.add(createLowerSubregsPass()); + printAndVerify(PM, "After LowerSubregs"); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + PM.add(createPrologEpilogCodeInserter()); + printAndVerify(PM, "After PrologEpilogCodeInserter"); + + // Run pre-sched2 passes. + if (addPreSched2(PM, OptLevel)) + printAndVerify(PM, "After PreSched2 passes"); + + // Second pass scheduler. + if (OptLevel != CodeGenOpt::None && !DisablePostRA) { + PM.add(createPostRAScheduler(OptLevel)); + printAndVerify(PM, "After PostRAScheduler"); + } + + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (OptLevel != CodeGenOpt::None && !DisableBranchFold) { + PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); + printNoVerify(PM, "After BranchFolding"); + } + + // Tail duplication. + if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) { + PM.add(createTailDuplicatePass(false)); + printNoVerify(PM, "After TailDuplicate"); + } + + PM.add(createGCMachineCodeAnalysisPass()); + + if (PrintGCInfo) + PM.add(createGCInfoPrinter(dbgs())); + + if (OptLevel != CodeGenOpt::None && !DisableCodePlace) { + PM.add(createCodePlacementOptPass()); + printNoVerify(PM, "After CodePlacementOpt"); + } + + if (addPreEmitPass(PM, OptLevel)) + printNoVerify(PM, "After PreEmit passes"); + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp new file mode 100644 index 0000000..b9527fa --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -0,0 +1,138 @@ +//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LatencyPriorityQueue class, which is a +// SchedulingPriorityQueue that schedules using latency information to +// reduce the length of the critical path through the basic block. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scheduler" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + + return OnlyAvailablePred; +} + +void LatencyPriorityQueue::push(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + } + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + + Queue.push_back(SU); +} + + +// ScheduledNode - As nodes are scheduled, we look to see if there are any +// successor nodes that have a single unscheduled predecessor. If so, that +// single predecessor has a higher priority, since scheduling it will make +// the node available. +void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + AdjustPriorityOfUnscheduledPreds(I->getSUnit()); + } +} + +/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} + +SUnit *LatencyPriorityQueue::pop() { + if (empty()) return NULL; + std::vector<SUnit *>::iterator Best = Queue.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Queue.end())) + std::swap(*Best, Queue.back()); + Queue.pop_back(); + return V; +} + +void LatencyPriorityQueue::remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); +} diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp new file mode 100644 index 0000000..59f380a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -0,0 +1,886 @@ +//===-- LiveInterval.cpp - Live Interval Representation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveRange and LiveInterval classes. Given some +// numbering of each the machine instructions an interval [i, j) is said to be a +// live interval for register v if there is no instruction with number j' > j +// such that v is live at j' and there is no instruction with number i' < i such +// that v is live at i'. In this implementation intervals can have holes, +// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each +// individual range is represented as an instance of LiveRange, and the whole +// interval is represented as an instance of LiveInterval. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +using namespace llvm; + +// An example for liveAt(): +// +// this = [1,4), liveAt(0) will return false. The instruction defining this +// spans slots [0,3]. The interval belongs to an spilled definition of the +// variable it represents. This is because slot 1 is used (def slot) and spans +// up to slot 3 (store slot). +// +bool LiveInterval::liveAt(SlotIndex I) const { + Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); + + if (r == ranges.begin()) + return false; + + --r; + return r->contains(I); +} + +// liveBeforeAndAt - Check if the interval is live at the index and the index +// just before it. If index is liveAt, check if it starts a new live range. +// If it does, then check if the previous live range ends at index-1. +bool LiveInterval::liveBeforeAndAt(SlotIndex I) const { + Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); + + if (r == ranges.begin()) + return false; + + --r; + if (!r->contains(I)) + return false; + if (I != r->start) + return true; + // I is the start of a live range. Check if the previous live range ends + // at I-1. + if (r == ranges.begin()) + return false; + return r->end == I; +} + +/// killedAt - Return true if a live range ends at index. Note that the kill +/// point is not contained in the half-open live range. It is usually the +/// getDefIndex() slot following its last use. +bool LiveInterval::killedAt(SlotIndex I) const { + Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I); + + // Now r points to the first interval with start >= I, or ranges.end(). + if (r == ranges.begin()) + return false; + + --r; + // Now r points to the last interval with end <= I. + // r->end is the kill point. + return r->end == I; +} + +/// killedInRange - Return true if the interval has kills in [Start,End). +bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const { + Ranges::const_iterator r = + std::lower_bound(ranges.begin(), ranges.end(), End); + + // Now r points to the first interval with start >= End, or ranges.end(). + if (r == ranges.begin()) + return false; + + --r; + // Now r points to the last interval with end <= End. + // r->end is the kill point. + return r->end >= Start && r->end < End; +} + +// overlaps - Return true if the intersection of the two live intervals is +// not empty. +// +// An example for overlaps(): +// +// 0: A = ... +// 4: B = ... +// 8: C = A + B ;; last use of A +// +// The live intervals should look like: +// +// A = [3, 11) +// B = [7, x) +// C = [11, y) +// +// A->overlaps(C) should return false since we want to be able to join +// A and C. +// +bool LiveInterval::overlapsFrom(const LiveInterval& other, + const_iterator StartPos) const { + assert(!empty() && "empty interval"); + const_iterator i = begin(); + const_iterator ie = end(); + const_iterator j = StartPos; + const_iterator je = other.end(); + + assert((StartPos->start <= i->start || StartPos == other.begin()) && + StartPos != other.end() && "Bogus start position hint!"); + + if (i->start < j->start) { + i = std::upper_bound(i, ie, j->start); + if (i != ranges.begin()) --i; + } else if (j->start < i->start) { + ++StartPos; + if (StartPos != other.end() && StartPos->start <= i->start) { + assert(StartPos < other.end() && i < end()); + j = std::upper_bound(j, je, i->start); + if (j != other.ranges.begin()) --j; + } + } else { + return true; + } + + if (j == je) return false; + + while (i != ie) { + if (i->start > j->start) { + std::swap(i, j); + std::swap(ie, je); + } + + if (i->end > j->start) + return true; + ++i; + } + + return false; +} + +/// overlaps - Return true if the live interval overlaps a range specified +/// by [Start, End). +bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { + assert(Start < End && "Invalid range"); + const_iterator I = std::lower_bound(begin(), end(), End); + return I != begin() && (--I)->end > Start; +} + + +/// ValNo is dead, remove it. If it is the largest value number, just nuke it +/// (and any other deleted values neighboring it), otherwise mark it as ~1U so +/// it can be nuked later. +void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { + if (ValNo->id == getNumValNums()-1) { + do { + valnos.pop_back(); + } while (!valnos.empty() && valnos.back()->isUnused()); + } else { + ValNo->setIsUnused(true); + } +} + +/// RenumberValues - Renumber all values in order of appearance and delete the +/// remaining unused values. +void LiveInterval::RenumberValues(LiveIntervals &lis) { + SmallPtrSet<VNInfo*, 8> Seen; + bool seenPHIDef = false; + valnos.clear(); + for (const_iterator I = begin(), E = end(); I != E; ++I) { + VNInfo *VNI = I->valno; + if (!Seen.insert(VNI)) + continue; + assert(!VNI->isUnused() && "Unused valno used by live range"); + VNI->id = (unsigned)valnos.size(); + valnos.push_back(VNI); + VNI->setHasPHIKill(false); + if (VNI->isPHIDef()) + seenPHIDef = true; + } + + // Recompute phi kill flags. + if (!seenPHIDef) + return; + for (const_vni_iterator I = vni_begin(), E = vni_end(); I != E; ++I) { + VNInfo *VNI = *I; + if (!VNI->isPHIDef()) + continue; + const MachineBasicBlock *PHIBB = lis.getMBBFromIndex(VNI->def); + assert(PHIBB && "No basic block for phi-def"); + for (MachineBasicBlock::const_pred_iterator PI = PHIBB->pred_begin(), + PE = PHIBB->pred_end(); PI != PE; ++PI) { + VNInfo *KVNI = getVNInfoAt(lis.getMBBEndIdx(*PI).getPrevSlot()); + if (KVNI) + KVNI->setHasPHIKill(true); + } + } +} + +/// extendIntervalEndTo - This method is used when we want to extend the range +/// specified by I to end at the specified endpoint. To do this, we should +/// merge and eliminate all ranges that this will overlap with. The iterator is +/// not invalidated. +void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { + assert(I != ranges.end() && "Not a valid interval!"); + VNInfo *ValNo = I->valno; + + // Search for the first interval that we can't merge with. + Ranges::iterator MergeTo = llvm::next(I); + for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) { + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + } + + // If NewEnd was in the middle of an interval, make sure to get its endpoint. + I->end = std::max(NewEnd, prior(MergeTo)->end); + + // Erase any dead ranges. + ranges.erase(llvm::next(I), MergeTo); + + // If the newly formed range now touches the range after it and if they have + // the same value number, merge the two ranges into one range. + Ranges::iterator Next = llvm::next(I); + if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) { + I->end = Next->end; + ranges.erase(Next); + } +} + + +/// extendIntervalStartTo - This method is used when we want to extend the range +/// specified by I to start at the specified endpoint. To do this, we should +/// merge and eliminate all ranges that this will overlap with. +LiveInterval::Ranges::iterator +LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) { + assert(I != ranges.end() && "Not a valid interval!"); + VNInfo *ValNo = I->valno; + + // Search for the first interval that we can't merge with. + Ranges::iterator MergeTo = I; + do { + if (MergeTo == ranges.begin()) { + I->start = NewStart; + ranges.erase(MergeTo, I); + return I; + } + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + --MergeTo; + } while (NewStart <= MergeTo->start); + + // If we start in the middle of another interval, just delete a range and + // extend that interval. + if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { + MergeTo->end = I->end; + } else { + // Otherwise, extend the interval right after. + ++MergeTo; + MergeTo->start = NewStart; + MergeTo->end = I->end; + } + + ranges.erase(llvm::next(MergeTo), llvm::next(I)); + return MergeTo; +} + +LiveInterval::iterator +LiveInterval::addRangeFrom(LiveRange LR, iterator From) { + SlotIndex Start = LR.start, End = LR.end; + iterator it = std::upper_bound(From, ranges.end(), Start); + + // If the inserted interval starts in the middle or right at the end of + // another interval, just extend that interval to contain the range of LR. + if (it != ranges.begin()) { + iterator B = prior(it); + if (LR.valno == B->valno) { + if (B->start <= Start && B->end >= Start) { + extendIntervalEndTo(B, End); + return B; + } + } else { + // Check to make sure that we are not overlapping two live ranges with + // different valno's. + assert(B->end <= Start && + "Cannot overlap two LiveRanges with differing ValID's" + " (did you def the same reg twice in a MachineInstr?)"); + } + } + + // Otherwise, if this range ends in the middle of, or right next to, another + // interval, merge it into that interval. + if (it != ranges.end()) { + if (LR.valno == it->valno) { + if (it->start <= End) { + it = extendIntervalStartTo(it, Start); + + // If LR is a complete superset of an interval, we may need to grow its + // endpoint as well. + if (End > it->end) + extendIntervalEndTo(it, End); + return it; + } + } else { + // Check to make sure that we are not overlapping two live ranges with + // different valno's. + assert(it->start >= End && + "Cannot overlap two LiveRanges with differing ValID's"); + } + } + + // Otherwise, this is just a new range that doesn't interact with anything. + // Insert it. + return ranges.insert(it, LR); +} + +/// isInOneLiveRange - Return true if the range specified is entirely in +/// a single LiveRange of the live interval. +bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) { + Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); + if (I == ranges.begin()) + return false; + --I; + return I->containsRange(Start, End); +} + + +/// removeRange - Remove the specified range from this interval. Note that +/// the range must be in a single LiveRange in its entirety. +void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, + bool RemoveDeadValNo) { + // Find the LiveRange containing this span. + Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); + assert(I != ranges.begin() && "Range is not in interval!"); + --I; + assert(I->containsRange(Start, End) && "Range is not entirely in interval!"); + + // If the span we are removing is at the start of the LiveRange, adjust it. + VNInfo *ValNo = I->valno; + if (I->start == Start) { + if (I->end == End) { + if (RemoveDeadValNo) { + // Check if val# is dead. + bool isDead = true; + for (const_iterator II = begin(), EE = end(); II != EE; ++II) + if (II != I && II->valno == ValNo) { + isDead = false; + break; + } + if (isDead) { + // Now that ValNo is dead, remove it. + markValNoForDeletion(ValNo); + } + } + + ranges.erase(I); // Removed the whole LiveRange. + } else + I->start = End; + return; + } + + // Otherwise if the span we are removing is at the end of the LiveRange, + // adjust the other way. + if (I->end == End) { + I->end = Start; + return; + } + + // Otherwise, we are splitting the LiveRange into two pieces. + SlotIndex OldEnd = I->end; + I->end = Start; // Trim the old interval. + + // Insert the new one. + ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo)); +} + +/// removeValNo - Remove all the ranges defined by the specified value#. +/// Also remove the value# from value# list. +void LiveInterval::removeValNo(VNInfo *ValNo) { + if (empty()) return; + Ranges::iterator I = ranges.end(); + Ranges::iterator E = ranges.begin(); + do { + --I; + if (I->valno == ValNo) + ranges.erase(I); + } while (I != E); + // Now that ValNo is dead, remove it. + markValNoForDeletion(ValNo); +} + +/// getLiveRangeContaining - Return the live range that contains the +/// specified index, or null if there is none. +LiveInterval::const_iterator +LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const { + const_iterator It = std::upper_bound(begin(), end(), Idx); + if (It != ranges.begin()) { + --It; + if (It->contains(Idx)) + return It; + } + + return end(); +} + +LiveInterval::iterator +LiveInterval::FindLiveRangeContaining(SlotIndex Idx) { + iterator It = std::upper_bound(begin(), end(), Idx); + if (It != begin()) { + --It; + if (It->contains(Idx)) + return It; + } + + return end(); +} + +/// findDefinedVNInfo - Find the VNInfo defined by the specified +/// index (register interval). +VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const { + for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end(); + i != e; ++i) { + if ((*i)->def == Idx) + return *i; + } + + return 0; +} + +/// findDefinedVNInfo - Find the VNInfo defined by the specified +/// register (stack inteval). +VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const { + for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end(); + i != e; ++i) { + if ((*i)->getReg() == reg) + return *i; + } + return 0; +} + +/// join - Join two live intervals (this, and other) together. This applies +/// mappings to the value numbers in the LHS/RHS intervals as specified. If +/// the intervals are not joinable, this aborts. +void LiveInterval::join(LiveInterval &Other, + const int *LHSValNoAssignments, + const int *RHSValNoAssignments, + SmallVector<VNInfo*, 16> &NewVNInfo, + MachineRegisterInfo *MRI) { + // Determine if any of our live range values are mapped. This is uncommon, so + // we want to avoid the interval scan if not. + bool MustMapCurValNos = false; + unsigned NumVals = getNumValNums(); + unsigned NumNewVals = NewVNInfo.size(); + for (unsigned i = 0; i != NumVals; ++i) { + unsigned LHSValID = LHSValNoAssignments[i]; + if (i != LHSValID || + (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) + MustMapCurValNos = true; + } + + // If we have to apply a mapping to our base interval assignment, rewrite it + // now. + if (MustMapCurValNos) { + // Map the first live range. + iterator OutIt = begin(); + OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; + ++OutIt; + for (iterator I = OutIt, E = end(); I != E; ++I) { + OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]]; + + // If this live range has the same value # as its immediate predecessor, + // and if they are neighbors, remove one LiveRange. This happens when we + // have [0,3:0)[4,7:1) and map 0/1 onto the same value #. + if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) { + (OutIt-1)->end = OutIt->end; + } else { + if (I != OutIt) { + OutIt->start = I->start; + OutIt->end = I->end; + } + + // Didn't merge, on to the next one. + ++OutIt; + } + } + + // If we merge some live ranges, chop off the end. + ranges.erase(OutIt, end()); + } + + // Remember assignements because val# ids are changing. + SmallVector<unsigned, 16> OtherAssignments; + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) + OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]); + + // Update val# info. Renumber them and make sure they all belong to this + // LiveInterval now. Also remove dead val#'s. + unsigned NumValNos = 0; + for (unsigned i = 0; i < NumNewVals; ++i) { + VNInfo *VNI = NewVNInfo[i]; + if (VNI) { + if (NumValNos >= NumVals) + valnos.push_back(VNI); + else + valnos[NumValNos] = VNI; + VNI->id = NumValNos++; // Renumber val#. + } + } + if (NumNewVals < NumVals) + valnos.resize(NumNewVals); // shrinkify + + // Okay, now insert the RHS live ranges into the LHS. + iterator InsertPos = begin(); + unsigned RangeNo = 0; + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) { + // Map the valno in the other live range to the current live range. + I->valno = NewVNInfo[OtherAssignments[RangeNo]]; + assert(I->valno && "Adding a dead range?"); + InsertPos = addRangeFrom(*I, InsertPos); + } + + ComputeJoinedWeight(Other); +} + +/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live +/// interval as the specified value number. The LiveRanges in RHS are +/// allowed to overlap with LiveRanges in the current interval, but only if +/// the overlapping LiveRanges have the specified value number. +void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, + VNInfo *LHSValNo) { + // TODO: Make this more efficient. + iterator InsertPos = begin(); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + // Map the valno in the other live range to the current live range. + LiveRange Tmp = *I; + Tmp.valno = LHSValNo; + InsertPos = addRangeFrom(Tmp, InsertPos); + } +} + + +/// MergeValueInAsValue - Merge all of the live ranges of a specific val# +/// in RHS into this live interval as the specified value number. +/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the +/// current interval, it will replace the value numbers of the overlaped +/// live ranges with the specified value number. +void LiveInterval::MergeValueInAsValue( + const LiveInterval &RHS, + const VNInfo *RHSValNo, VNInfo *LHSValNo) { + SmallVector<VNInfo*, 4> ReplacedValNos; + iterator IP = begin(); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo"); + if (I->valno != RHSValNo) + continue; + SlotIndex Start = I->start, End = I->end; + IP = std::upper_bound(IP, end(), Start); + // If the start of this range overlaps with an existing liverange, trim it. + if (IP != begin() && IP[-1].end > Start) { + if (IP[-1].valno != LHSValNo) { + ReplacedValNos.push_back(IP[-1].valno); + IP[-1].valno = LHSValNo; // Update val#. + } + Start = IP[-1].end; + // Trimmed away the whole range? + if (Start >= End) continue; + } + // If the end of this range overlaps with an existing liverange, trim it. + if (IP != end() && End > IP->start) { + if (IP->valno != LHSValNo) { + ReplacedValNos.push_back(IP->valno); + IP->valno = LHSValNo; // Update val#. + } + End = IP->start; + // If this trimmed away the whole range, ignore it. + if (Start == End) continue; + } + + // Map the valno in the other live range to the current live range. + IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP); + } + + + SmallSet<VNInfo*, 4> Seen; + for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) { + VNInfo *V1 = ReplacedValNos[i]; + if (Seen.insert(V1)) { + bool isDead = true; + for (const_iterator I = begin(), E = end(); I != E; ++I) + if (I->valno == V1) { + isDead = false; + break; + } + if (isDead) { + // Now that V1 is dead, remove it. + markValNoForDeletion(V1); + } + } + } +} + + +/// MergeInClobberRanges - For any live ranges that are not defined in the +/// current interval, but are defined in the Clobbers interval, mark them +/// used with an unknown definition value. +void LiveInterval::MergeInClobberRanges(LiveIntervals &li_, + const LiveInterval &Clobbers, + VNInfo::Allocator &VNInfoAllocator) { + if (Clobbers.empty()) return; + + DenseMap<VNInfo*, VNInfo*> ValNoMaps; + VNInfo *UnusedValNo = 0; + iterator IP = begin(); + for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) { + // For every val# in the Clobbers interval, create a new "unknown" val#. + VNInfo *ClobberValNo = 0; + DenseMap<VNInfo*, VNInfo*>::iterator VI = ValNoMaps.find(I->valno); + if (VI != ValNoMaps.end()) + ClobberValNo = VI->second; + else if (UnusedValNo) + ClobberValNo = UnusedValNo; + else { + UnusedValNo = ClobberValNo = + getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator); + ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo)); + } + + bool Done = false; + SlotIndex Start = I->start, End = I->end; + // If a clobber range starts before an existing range and ends after + // it, the clobber range will need to be split into multiple ranges. + // Loop until the entire clobber range is handled. + while (!Done) { + Done = true; + IP = std::upper_bound(IP, end(), Start); + SlotIndex SubRangeStart = Start; + SlotIndex SubRangeEnd = End; + + // If the start of this range overlaps with an existing liverange, trim it. + if (IP != begin() && IP[-1].end > SubRangeStart) { + SubRangeStart = IP[-1].end; + // Trimmed away the whole range? + if (SubRangeStart >= SubRangeEnd) continue; + } + // If the end of this range overlaps with an existing liverange, trim it. + if (IP != end() && SubRangeEnd > IP->start) { + // If the clobber live range extends beyond the existing live range, + // it'll need at least another live range, so set the flag to keep + // iterating. + if (SubRangeEnd > IP->end) { + Start = IP->end; + Done = false; + } + SubRangeEnd = IP->start; + // If this trimmed away the whole range, ignore it. + if (SubRangeStart == SubRangeEnd) continue; + } + + // Insert the clobber interval. + IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo), + IP); + UnusedValNo = 0; + } + } + + if (UnusedValNo) { + // Delete the last unused val#. + valnos.pop_back(); + } +} + +void LiveInterval::MergeInClobberRange(LiveIntervals &li_, + SlotIndex Start, + SlotIndex End, + VNInfo::Allocator &VNInfoAllocator) { + // Find a value # to use for the clobber ranges. If there is already a value# + // for unknown values, use it. + VNInfo *ClobberValNo = + getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator); + + iterator IP = begin(); + IP = std::upper_bound(IP, end(), Start); + + // If the start of this range overlaps with an existing liverange, trim it. + if (IP != begin() && IP[-1].end > Start) { + Start = IP[-1].end; + // Trimmed away the whole range? + if (Start >= End) return; + } + // If the end of this range overlaps with an existing liverange, trim it. + if (IP != end() && End > IP->start) { + End = IP->start; + // If this trimmed away the whole range, ignore it. + if (Start == End) return; + } + + // Insert the clobber interval. + addRangeFrom(LiveRange(Start, End, ClobberValNo), IP); +} + +/// MergeValueNumberInto - This method is called when two value nubmers +/// are found to be equivalent. This eliminates V1, replacing all +/// LiveRanges with the V1 value number with the V2 value number. This can +/// cause merging of V1/V2 values numbers and compaction of the value space. +VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { + assert(V1 != V2 && "Identical value#'s are always equivalent!"); + + // This code actually merges the (numerically) larger value number into the + // smaller value number, which is likely to allow us to compactify the value + // space. The only thing we have to be careful of is to preserve the + // instruction that defines the result value. + + // Make sure V2 is smaller than V1. + if (V1->id < V2->id) { + V1->copyFrom(*V2); + std::swap(V1, V2); + } + + // Merge V1 live ranges into V2. + for (iterator I = begin(); I != end(); ) { + iterator LR = I++; + if (LR->valno != V1) continue; // Not a V1 LiveRange. + + // Okay, we found a V1 live range. If it had a previous, touching, V2 live + // range, extend it. + if (LR != begin()) { + iterator Prev = LR-1; + if (Prev->valno == V2 && Prev->end == LR->start) { + Prev->end = LR->end; + + // Erase this live-range. + ranges.erase(LR); + I = Prev+1; + LR = Prev; + } + } + + // Okay, now we have a V1 or V2 live range that is maximally merged forward. + // Ensure that it is a V2 live-range. + LR->valno = V2; + + // If we can merge it into later V2 live ranges, do so now. We ignore any + // following V1 live ranges, as they will be merged in subsequent iterations + // of the loop. + if (I != end()) { + if (I->start == LR->end && I->valno == V2) { + LR->end = I->end; + ranges.erase(I); + I = LR+1; + } + } + } + + // Now that V1 is dead, remove it. + markValNoForDeletion(V1); + + return V2; +} + +void LiveInterval::Copy(const LiveInterval &RHS, + MachineRegisterInfo *MRI, + VNInfo::Allocator &VNInfoAllocator) { + ranges.clear(); + valnos.clear(); + std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg); + MRI->setRegAllocationHint(reg, Hint.first, Hint.second); + + weight = RHS.weight; + for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) { + const VNInfo *VNI = RHS.getValNumInfo(i); + createValueCopy(VNI, VNInfoAllocator); + } + for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) { + const LiveRange &LR = RHS.ranges[i]; + addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id))); + } +} + +unsigned LiveInterval::getSize() const { + unsigned Sum = 0; + for (const_iterator I = begin(), E = end(); I != E; ++I) + Sum += I->start.distance(I->end); + return Sum; +} + +/// ComputeJoinedWeight - Set the weight of a live interval Joined +/// after Other has been merged into it. +void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) { + // If either of these intervals was spilled, the weight is the + // weight of the non-spilled interval. This can only happen with + // iterative coalescers. + + if (Other.weight != HUGE_VALF) { + weight += Other.weight; + } + else if (weight == HUGE_VALF && + !TargetRegisterInfo::isPhysicalRegister(reg)) { + // Remove this assert if you have an iterative coalescer + assert(0 && "Joining to spilled interval"); + weight = Other.weight; + } + else { + // Otherwise the weight stays the same + // Remove this assert if you have an iterative coalescer + assert(0 && "Joining from spilled interval"); + } +} + +raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { + return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; +} + +void LiveRange::dump() const { + dbgs() << *this << "\n"; +} + +void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { + if (isStackSlot()) + OS << "SS#" << getStackSlotIndex(); + else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg)) + OS << TRI->getName(reg); + else + OS << "%reg" << reg; + + OS << ',' << weight; + + if (empty()) + OS << " EMPTY"; + else { + OS << " = "; + for (LiveInterval::Ranges::const_iterator I = ranges.begin(), + E = ranges.end(); I != E; ++I) { + OS << *I; + assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo"); + } + } + + // Print value number info. + if (getNumValNums()) { + OS << " "; + unsigned vnum = 0; + for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e; + ++i, ++vnum) { + const VNInfo *vni = *i; + if (vnum) OS << " "; + OS << vnum << "@"; + if (vni->isUnused()) { + OS << "x"; + } else { + if (!vni->isDefAccurate() && !vni->isPHIDef()) + OS << "?"; + else + OS << vni->def; + if (vni->hasPHIKill()) + OS << "-phikill"; + if (vni->hasRedefByEC()) + OS << "-ec"; + } + } + } +} + +void LiveInterval::dump() const { + dbgs() << *this << "\n"; +} + + +void LiveRange::print(raw_ostream &os) const { + os << *this; +} diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp new file mode 100644 index 0000000..2726fc3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -0,0 +1,2008 @@ +//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveInterval analysis pass which is used +// by the Linear Scan Register allocator. This pass linearizes the +// basic blocks of the function in DFS order and uses the +// LiveVariables pass to conservatively compute live intervals for +// each virtual and physical register. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "liveintervals" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "VirtRegMap.h" +#include "llvm/Value.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ProcessImplicitDefs.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <limits> +#include <cmath> +using namespace llvm; + +// Hidden options for help debugging. +static cl::opt<bool> DisableReMat("disable-rematerialization", + cl::init(false), cl::Hidden); + +STATISTIC(numIntervals , "Number of original intervals"); +STATISTIC(numFolds , "Number of loads/stores folded into instructions"); +STATISTIC(numSplits , "Number of intervals split"); + +char LiveIntervals::ID = 0; +INITIALIZE_PASS(LiveIntervals, "liveintervals", + "Live Interval Analysis", false, false); + +void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); + AU.addRequired<LiveVariables>(); + AU.addPreserved<LiveVariables>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + + if (!StrongPHIElim) { + AU.addPreservedID(PHIEliminationID); + AU.addRequiredID(PHIEliminationID); + } + + AU.addRequiredID(TwoAddressInstructionPassID); + AU.addPreserved<ProcessImplicitDefs>(); + AU.addRequired<ProcessImplicitDefs>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequiredTransitive<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void LiveIntervals::releaseMemory() { + // Free the live intervals themselves. + for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(), + E = r2iMap_.end(); I != E; ++I) + delete I->second; + + r2iMap_.clear(); + + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); + while (!CloneMIs.empty()) { + MachineInstr *MI = CloneMIs.back(); + CloneMIs.pop_back(); + mf_->DeleteMachineInstr(MI); + } +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &mf_->getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + aa_ = &getAnalysis<AliasAnalysis>(); + lv_ = &getAnalysis<LiveVariables>(); + indexes_ = &getAnalysis<SlotIndexes>(); + allocatableRegs_ = tri_->getAllocatableSet(fn); + + computeIntervals(); + + numIntervals += getNumIntervals(); + + DEBUG(dump()); + return true; +} + +/// print - Implement the dump method. +void LiveIntervals::print(raw_ostream &OS, const Module* ) const { + OS << "********** INTERVALS **********\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) { + I->second->print(OS, tri_); + OS << "\n"; + } + + printInstrs(OS); +} + +void LiveIntervals::printInstrs(raw_ostream &OS) const { + OS << "********** MACHINEINSTRS **********\n"; + + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + OS << "BB#" << mbbi->getNumber() + << ":\t\t# derived from " << mbbi->getName() << "\n"; + for (MachineBasicBlock::iterator mii = mbbi->begin(), + mie = mbbi->end(); mii != mie; ++mii) { + if (mii->isDebugValue()) + OS << " \t" << *mii; + else + OS << getInstructionIndex(mii) << '\t' << *mii; + } + } +} + +void LiveIntervals::dumpInstrs() const { + printInstrs(dbgs()); +} + +bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, + VirtRegMap &vrm, unsigned reg) { + // We don't handle fancy stuff crossing basic block boundaries + if (li.ranges.size() != 1) + return true; + const LiveRange &range = li.ranges.front(); + SlotIndex idx = range.start.getBaseIndex(); + SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex(); + + // Skip deleted instructions + MachineInstr *firstMI = getInstructionFromIndex(idx); + while (!firstMI && idx != end) { + idx = idx.getNextIndex(); + firstMI = getInstructionFromIndex(idx); + } + if (!firstMI) + return false; + + // Find last instruction in range + SlotIndex lastIdx = end.getPrevIndex(); + MachineInstr *lastMI = getInstructionFromIndex(lastIdx); + while (!lastMI && lastIdx != idx) { + lastIdx = lastIdx.getPrevIndex(); + lastMI = getInstructionFromIndex(lastIdx); + } + if (!lastMI) + return false; + + // Range cannot cross basic block boundaries or terminators + MachineBasicBlock *MBB = firstMI->getParent(); + if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator()) + return true; + + MachineBasicBlock::const_iterator E = lastMI; + ++E; + for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) { + const MachineInstr &MI = *I; + + // Allow copies to and from li.reg + if (MI.isCopy()) + if (MI.getOperand(0).getReg() == li.reg || + MI.getOperand(1).getReg() == li.reg) + continue; + + // Check for operands using reg + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand& mop = MI.getOperand(i); + if (!mop.isReg()) + continue; + unsigned PhysReg = mop.getReg(); + if (PhysReg == 0 || PhysReg == li.reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(PhysReg)) { + if (!vrm.hasPhys(PhysReg)) + continue; + PhysReg = vrm.getPhys(PhysReg); + } + if (PhysReg && tri_->regsOverlap(PhysReg, reg)) + return true; + } + } + + // No conflicts found. + return false; +} + +bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg, + SmallPtrSet<MachineInstr*,32> &JoinedCopies) { + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + for (SlotIndex index = I->start.getBaseIndex(), + end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); + index != end; + index = index.getNextIndex()) { + MachineInstr *MI = getInstructionFromIndex(index); + if (!MI) + continue; // skip deleted instructions + + if (JoinedCopies.count(MI)) + continue; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned PhysReg = MO.getReg(); + if (PhysReg == 0 || PhysReg == Reg || + TargetRegisterInfo::isVirtualRegister(PhysReg)) + continue; + if (tri_->regsOverlap(Reg, PhysReg)) + return true; + } + } + } + + return false; +} + +#ifndef NDEBUG +static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) { + if (TargetRegisterInfo::isPhysicalRegister(reg)) + dbgs() << tri_->getName(reg); + else + dbgs() << "%reg" << reg; +} +#endif + +static +bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { + unsigned Reg = MI.getOperand(MOIdx).getReg(); + for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg()) + continue; + if (MO.getReg() == Reg && MO.isDef()) { + assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() && + MI.getOperand(MOIdx).getSubReg() && + (MO.getSubReg() || MO.isImplicit())); + return true; + } + } + return false; +} + +/// isPartialRedef - Return true if the specified def at the specific index is +/// partially re-defining the specified live interval. A common case of this is +/// a definition of the sub-register. +bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, + LiveInterval &interval) { + if (!MO.getSubReg() || MO.isEarlyClobber()) + return false; + + SlotIndex RedefIndex = MIIdx.getDefIndex(); + const LiveRange *OldLR = + interval.getLiveRangeContaining(RedefIndex.getUseIndex()); + if (OldLR->valno->isDefAccurate()) { + MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def); + return DefMI->findRegisterDefOperandIdx(interval.reg) != -1; + } + return false; +} + +void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, + MachineBasicBlock::iterator mi, + SlotIndex MIIdx, + MachineOperand& MO, + unsigned MOIdx, + LiveInterval &interval) { + DEBUG({ + dbgs() << "\t\tregister: "; + printRegName(interval.reg, tri_); + }); + + // Virtual registers may be defined multiple times (due to phi + // elimination and 2-addr elimination). Much of what we do only has to be + // done once for the vreg. We use an empty interval to detect the first + // time we see a vreg. + LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); + if (interval.empty()) { + // Get the Idx of the defining instructions. + SlotIndex defIndex = MIIdx.getDefIndex(); + // Earlyclobbers move back one, so that they overlap the live range + // of inputs. + if (MO.isEarlyClobber()) + defIndex = MIIdx.getUseIndex(); + + // Make sure the first definition is not a partial redefinition. Add an + // <imp-def> of the full register. + if (MO.getSubReg()) + mi->addRegisterDefined(interval.reg); + + MachineInstr *CopyMI = NULL; + if (mi->isCopyLike()) { + CopyMI = mi; + } + + VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true, + VNInfoAllocator); + assert(ValNo->id == 0 && "First value in interval is not 0?"); + + // Loop over all of the blocks that the vreg is defined in. There are + // two cases we have to handle here. The most common case is a vreg + // whose lifetime is contained within a basic block. In this case there + // will be a single kill, in MBB, which comes after the definition. + if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) { + // FIXME: what about dead vars? + SlotIndex killIdx; + if (vi.Kills[0] != mi) + killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex(); + else + killIdx = defIndex.getStoreIndex(); + + // If the kill happens after the definition, we have an intra-block + // live range. + if (killIdx > defIndex) { + assert(vi.AliveBlocks.empty() && + "Shouldn't be alive across any blocks!"); + LiveRange LR(defIndex, killIdx, ValNo); + interval.addRange(LR); + DEBUG(dbgs() << " +" << LR << "\n"); + return; + } + } + + // The other case we handle is when a virtual register lives to the end + // of the defining block, potentially live across some blocks, then is + // live into some number of blocks, but gets killed. Start by adding a + // range that goes from this definition to the end of the defining block. + LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo); + DEBUG(dbgs() << " +" << NewLR); + interval.addRange(NewLR); + + bool PHIJoin = lv_->isPHIJoin(interval.reg); + + if (PHIJoin) { + // A phi join register is killed at the end of the MBB and revived as a new + // valno in the killing blocks. + assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); + DEBUG(dbgs() << " phi-join"); + ValNo->setHasPHIKill(true); + } else { + // Iterate over all of the blocks that the variable is completely + // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the + // live interval. + for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), + E = vi.AliveBlocks.end(); I != E; ++I) { + MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I); + LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo); + interval.addRange(LR); + DEBUG(dbgs() << " +" << LR); + } + } + + // Finally, this virtual register is live from the start of any killing + // block to the 'use' slot of the killing instruction. + for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { + MachineInstr *Kill = vi.Kills[i]; + SlotIndex Start = getMBBStartIdx(Kill->getParent()); + SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex(); + + // Create interval with one of a NEW value number. Note that this value + // number isn't actually defined by an instruction, weird huh? :) + if (PHIJoin) { + ValNo = interval.getNextValue(SlotIndex(Start, true), 0, false, + VNInfoAllocator); + ValNo->setIsPHIDef(true); + } + LiveRange LR(Start, killIdx, ValNo); + interval.addRange(LR); + DEBUG(dbgs() << " +" << LR); + } + + } else { + if (MultipleDefsBySameMI(*mi, MOIdx)) + // Multiple defs of the same virtual register by the same instruction. + // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ... + // This is likely due to elimination of REG_SEQUENCE instructions. Return + // here since there is nothing to do. + return; + + // If this is the second time we see a virtual register definition, it + // must be due to phi elimination or two addr elimination. If this is + // the result of two address elimination, then the vreg is one of the + // def-and-use register operand. + + // It may also be partial redef like this: + // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0 + // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0 + bool PartReDef = isPartialRedef(MIIdx, MO, interval); + if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) { + // If this is a two-address definition, then we have already processed + // the live range. The only problem is that we didn't realize there + // are actually two values in the live interval. Because of this we + // need to take the LiveRegion that defines this register and split it + // into two values. + SlotIndex RedefIndex = MIIdx.getDefIndex(); + if (MO.isEarlyClobber()) + RedefIndex = MIIdx.getUseIndex(); + + const LiveRange *OldLR = + interval.getLiveRangeContaining(RedefIndex.getUseIndex()); + VNInfo *OldValNo = OldLR->valno; + SlotIndex DefIndex = OldValNo->def.getDefIndex(); + + // Delete the previous value, which should be short and continuous, + // because the 2-addr copy must be in the same MBB as the redef. + interval.removeRange(DefIndex, RedefIndex); + + // The new value number (#1) is defined by the instruction we claimed + // defined value #0. + VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(), + false, // update at * + VNInfoAllocator); + ValNo->setFlags(OldValNo->getFlags()); // * <- updating here + + // Value#0 is now defined by the 2-addr instruction. + OldValNo->def = RedefIndex; + OldValNo->setCopy(0); + + // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ... + if (PartReDef && mi->isCopyLike()) + OldValNo->setCopy(&*mi); + + // Add the new live interval which replaces the range for the input copy. + LiveRange LR(DefIndex, RedefIndex, ValNo); + DEBUG(dbgs() << " replace range with " << LR); + interval.addRange(LR); + + // If this redefinition is dead, we need to add a dummy unit live + // range covering the def slot. + if (MO.isDead()) + interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(), + OldValNo)); + + DEBUG({ + dbgs() << " RESULT: "; + interval.print(dbgs(), tri_); + }); + } else if (lv_->isPHIJoin(interval.reg)) { + // In the case of PHI elimination, each variable definition is only + // live until the end of the block. We've already taken care of the + // rest of the live range. + + SlotIndex defIndex = MIIdx.getDefIndex(); + if (MO.isEarlyClobber()) + defIndex = MIIdx.getUseIndex(); + + VNInfo *ValNo; + MachineInstr *CopyMI = NULL; + if (mi->isCopyLike()) + CopyMI = mi; + ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); + + SlotIndex killIndex = getMBBEndIdx(mbb); + LiveRange LR(defIndex, killIndex, ValNo); + interval.addRange(LR); + ValNo->setHasPHIKill(true); + DEBUG(dbgs() << " phi-join +" << LR); + } else { + llvm_unreachable("Multiply defined register"); + } + } + + DEBUG(dbgs() << '\n'); +} + +void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, + MachineBasicBlock::iterator mi, + SlotIndex MIIdx, + MachineOperand& MO, + LiveInterval &interval, + MachineInstr *CopyMI) { + // A physical register cannot be live across basic block, so its + // lifetime must end somewhere in its defining basic block. + DEBUG({ + dbgs() << "\t\tregister: "; + printRegName(interval.reg, tri_); + }); + + SlotIndex baseIndex = MIIdx; + SlotIndex start = baseIndex.getDefIndex(); + // Earlyclobbers move back one. + if (MO.isEarlyClobber()) + start = MIIdx.getUseIndex(); + SlotIndex end = start; + + // If it is not used after definition, it is considered dead at + // the instruction defining it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + // For earlyclobbers, the defSlot was pushed back one; the extra + // advance below compensates. + if (MO.isDead()) { + DEBUG(dbgs() << " dead"); + end = start.getStoreIndex(); + goto exit; + } + + // If it is not dead on definition, it must be killed by a + // subsequent instruction. Hence its interval is: + // [defSlot(def), useSlot(kill)+1) + baseIndex = baseIndex.getNextIndex(); + while (++mi != MBB->end()) { + + if (mi->isDebugValue()) + continue; + if (getInstructionFromIndex(baseIndex) == 0) + baseIndex = indexes_->getNextNonNullIndex(baseIndex); + + if (mi->killsRegister(interval.reg, tri_)) { + DEBUG(dbgs() << " killed"); + end = baseIndex.getDefIndex(); + goto exit; + } else { + int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_); + if (DefIdx != -1) { + if (mi->isRegTiedToUseOperand(DefIdx)) { + // Two-address instruction. + end = baseIndex.getDefIndex(); + } else { + // Another instruction redefines the register before it is ever read. + // Then the register is essentially dead at the instruction that + // defines it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + DEBUG(dbgs() << " dead"); + end = start.getStoreIndex(); + } + goto exit; + } + } + + baseIndex = baseIndex.getNextIndex(); + } + + // The only case we should have a dead physreg here without a killing or + // instruction where we know it's dead is if it is live-in to the function + // and never used. Another possible case is the implicit use of the + // physical register has been deleted by two-address pass. + end = start.getStoreIndex(); + +exit: + assert(start < end && "did not find end of interval?"); + + // Already exists? Extend old live interval. + LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start); + bool Extend = OldLR != interval.end(); + VNInfo *ValNo = Extend + ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator); + if (MO.isEarlyClobber() && Extend) + ValNo->setHasRedefByEC(true); + LiveRange LR(start, end, ValNo); + interval.addRange(LR); + DEBUG(dbgs() << " +" << LR << '\n'); +} + +void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MI, + SlotIndex MIIdx, + MachineOperand& MO, + unsigned MOIdx) { + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, + getOrCreateInterval(MO.getReg())); + else if (allocatableRegs_[MO.getReg()]) { + MachineInstr *CopyMI = NULL; + if (MI->isCopyLike()) + CopyMI = MI; + handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, + getOrCreateInterval(MO.getReg()), CopyMI); + // Def of a register also defines its sub-registers. + for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS) + // If MI also modifies the sub-register explicitly, avoid processing it + // more than once. Do not pass in TRI here so it checks for exact match. + if (!MI->definesRegister(*AS)) + handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, + getOrCreateInterval(*AS), 0); + } +} + +void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, + SlotIndex MIIdx, + LiveInterval &interval, bool isAlias) { + DEBUG({ + dbgs() << "\t\tlivein register: "; + printRegName(interval.reg, tri_); + }); + + // Look for kills, if it reaches a def before it's killed, then it shouldn't + // be considered a livein. + MachineBasicBlock::iterator mi = MBB->begin(); + MachineBasicBlock::iterator E = MBB->end(); + // Skip over DBG_VALUE at the start of the MBB. + if (mi != E && mi->isDebugValue()) { + while (++mi != E && mi->isDebugValue()) + ; + if (mi == E) + // MBB is empty except for DBG_VALUE's. + return; + } + + SlotIndex baseIndex = MIIdx; + SlotIndex start = baseIndex; + if (getInstructionFromIndex(baseIndex) == 0) + baseIndex = indexes_->getNextNonNullIndex(baseIndex); + + SlotIndex end = baseIndex; + bool SeenDefUse = false; + + while (mi != E) { + if (mi->killsRegister(interval.reg, tri_)) { + DEBUG(dbgs() << " killed"); + end = baseIndex.getDefIndex(); + SeenDefUse = true; + break; + } else if (mi->definesRegister(interval.reg, tri_)) { + // Another instruction redefines the register before it is ever read. + // Then the register is essentially dead at the instruction that defines + // it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + DEBUG(dbgs() << " dead"); + end = start.getStoreIndex(); + SeenDefUse = true; + break; + } + + while (++mi != E && mi->isDebugValue()) + // Skip over DBG_VALUE. + ; + if (mi != E) + baseIndex = indexes_->getNextNonNullIndex(baseIndex); + } + + // Live-in register might not be used at all. + if (!SeenDefUse) { + if (isAlias) { + DEBUG(dbgs() << " dead"); + end = MIIdx.getStoreIndex(); + } else { + DEBUG(dbgs() << " live through"); + end = baseIndex; + } + } + + VNInfo *vni = + interval.getNextValue(SlotIndex(getMBBStartIdx(MBB), true), + 0, false, VNInfoAllocator); + vni->setIsPHIDef(true); + LiveRange LR(start, end, vni); + + interval.addRange(LR); + DEBUG(dbgs() << " +" << LR << '\n'); +} + +/// computeIntervals - computes the live intervals for virtual +/// registers. for some ordering of the machine instructions [1,N] a +/// live interval is an interval [i, j) where 1 <= i <= j < N for +/// which a variable is live +void LiveIntervals::computeIntervals() { + DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'); + + SmallVector<unsigned, 8> UndefUses; + for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + if (MBB->empty()) + continue; + + // Track the index of the current machine instr. + SlotIndex MIIndex = getMBBStartIdx(MBB); + DEBUG(dbgs() << "BB#" << MBB->getNumber() + << ":\t\t# derived from " << MBB->getName() << "\n"); + + // Create intervals for live-ins to this BB first. + for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), + LE = MBB->livein_end(); LI != LE; ++LI) { + handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); + // Multiple live-ins can alias the same register. + for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS) + if (!hasInterval(*AS)) + handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS), + true); + } + + // Skip over empty initial indices. + if (getInstructionFromIndex(MIIndex) == 0) + MIIndex = indexes_->getNextNonNullIndex(MIIndex); + + for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); + MI != miEnd; ++MI) { + DEBUG(dbgs() << MIIndex << "\t" << *MI); + if (MI->isDebugValue()) + continue; + + // Handle defs. + for (int i = MI->getNumOperands() - 1; i >= 0; --i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + + // handle register defs - build intervals + if (MO.isDef()) + handleRegisterDef(MBB, MI, MIIndex, MO, i); + else if (MO.isUndef()) + UndefUses.push_back(MO.getReg()); + } + + // Move to the next instr slot. + MIIndex = indexes_->getNextNonNullIndex(MIIndex); + } + } + + // Create empty intervals for registers defined by implicit_def's (except + // for those implicit_def that define values which are liveout of their + // blocks. + for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) { + unsigned UndefReg = UndefUses[i]; + (void)getOrCreateInterval(UndefReg); + } +} + +LiveInterval* LiveIntervals::createInterval(unsigned reg) { + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F; + return new LiveInterval(reg, Weight); +} + +/// dupInterval - Duplicate a live interval. The caller is responsible for +/// managing the allocated memory. +LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { + LiveInterval *NewLI = createInterval(li->reg); + NewLI->Copy(*li, mri_, getVNInfoAllocator()); + return NewLI; +} + +//===----------------------------------------------------------------------===// +// Register allocator hooks. +// + +/// getReMatImplicitUse - If the remat definition MI has one (for now, we only +/// allow one) virtual register operand, then its uses are implicitly using +/// the register. Returns the virtual register. +unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, + MachineInstr *MI) const { + unsigned RegOp = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || Reg == li.reg) + continue; + + if (TargetRegisterInfo::isPhysicalRegister(Reg) && + !allocatableRegs_[Reg]) + continue; + // FIXME: For now, only remat MI with at most one register operand. + assert(!RegOp && + "Can't rematerialize instruction with multiple register operand!"); + RegOp = MO.getReg(); +#ifndef NDEBUG + break; +#endif + } + return RegOp; +} + +/// isValNoAvailableAt - Return true if the val# of the specified interval +/// which reaches the given instruction also reaches the specified use index. +bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, + SlotIndex UseIdx) const { + SlotIndex Index = getInstructionIndex(MI); + VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno; + LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx); + return UI != li.end() && UI->valno == ValNo; +} + +/// isReMaterializable - Returns true if the definition MI of the specified +/// val# of the specified interval is re-materializable. +bool LiveIntervals::isReMaterializable(const LiveInterval &li, + const VNInfo *ValNo, MachineInstr *MI, + SmallVectorImpl<LiveInterval*> &SpillIs, + bool &isLoad) { + if (DisableReMat) + return false; + + if (!tii_->isTriviallyReMaterializable(MI, aa_)) + return false; + + // Target-specific code can mark an instruction as being rematerializable + // if it has one virtual reg use, though it had better be something like + // a PIC base register which is likely to be live everywhere. + unsigned ImpUse = getReMatImplicitUse(li, MI); + if (ImpUse) { + const LiveInterval &ImpLi = getInterval(ImpUse); + for (MachineRegisterInfo::use_nodbg_iterator + ri = mri_->use_nodbg_begin(li.reg), re = mri_->use_nodbg_end(); + ri != re; ++ri) { + MachineInstr *UseMI = &*ri; + SlotIndex UseIdx = getInstructionIndex(UseMI); + if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo) + continue; + if (!isValNoAvailableAt(ImpLi, MI, UseIdx)) + return false; + } + + // If a register operand of the re-materialized instruction is going to + // be spilled next, then it's not legal to re-materialize this instruction. + for (unsigned i = 0, e = SpillIs.size(); i != e; ++i) + if (ImpUse == SpillIs[i]->reg) + return false; + } + return true; +} + +/// isReMaterializable - Returns true if the definition MI of the specified +/// val# of the specified interval is re-materializable. +bool LiveIntervals::isReMaterializable(const LiveInterval &li, + const VNInfo *ValNo, MachineInstr *MI) { + SmallVector<LiveInterval*, 4> Dummy1; + bool Dummy2; + return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2); +} + +/// isReMaterializable - Returns true if every definition of MI of every +/// val# of the specified interval is re-materializable. +bool LiveIntervals::isReMaterializable(const LiveInterval &li, + SmallVectorImpl<LiveInterval*> &SpillIs, + bool &isLoad) { + isLoad = false; + for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); + i != e; ++i) { + const VNInfo *VNI = *i; + if (VNI->isUnused()) + continue; // Dead val#. + // Is the def for the val# rematerializable? + if (!VNI->isDefAccurate()) + return false; + MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def); + bool DefIsLoad = false; + if (!ReMatDefMI || + !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad)) + return false; + isLoad |= DefIsLoad; + } + return true; +} + +/// FilterFoldedOps - Filter out two-address use operands. Return +/// true if it finds any issue with the operands that ought to prevent +/// folding. +static bool FilterFoldedOps(MachineInstr *MI, + SmallVector<unsigned, 2> &Ops, + unsigned &MRInfo, + SmallVector<unsigned, 2> &FoldOps) { + MRInfo = 0; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + unsigned OpIdx = Ops[i]; + MachineOperand &MO = MI->getOperand(OpIdx); + // FIXME: fold subreg use. + if (MO.getSubReg()) + return true; + if (MO.isDef()) + MRInfo |= (unsigned)VirtRegMap::isMod; + else { + // Filter out two-address use operand(s). + if (MI->isRegTiedToDefOperand(OpIdx)) { + MRInfo = VirtRegMap::isModRef; + continue; + } + MRInfo |= (unsigned)VirtRegMap::isRef; + } + FoldOps.push_back(OpIdx); + } + return false; +} + + +/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from +/// slot / to reg or any rematerialized load into ith operand of specified +/// MI. If it is successul, MI is updated with the newly created MI and +/// returns true. +bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, + VirtRegMap &vrm, MachineInstr *DefMI, + SlotIndex InstrIdx, + SmallVector<unsigned, 2> &Ops, + bool isSS, int Slot, unsigned Reg) { + // If it is an implicit def instruction, just delete it. + if (MI->isImplicitDef()) { + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++numFolds; + return true; + } + + // Filter the list of operand indexes that are to be folded. Abort if + // any operand will prevent folding. + unsigned MRInfo = 0; + SmallVector<unsigned, 2> FoldOps; + if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) + return false; + + // The only time it's safe to fold into a two address instruction is when + // it's folding reload and spill from / into a spill stack slot. + if (DefMI && (MRInfo & VirtRegMap::isMod)) + return false; + + MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot) + : tii_->foldMemoryOperand(MI, FoldOps, DefMI); + if (fmi) { + // Remember this instruction uses the spill slot. + if (isSS) vrm.addSpillSlotUse(Slot, fmi); + + // Attempt to fold the memory reference into the instruction. If + // we can do this, we don't need to insert spill code. + if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot)) + vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo); + vrm.transferSpillPts(MI, fmi); + vrm.transferRestorePts(MI, fmi); + vrm.transferEmergencySpills(MI, fmi); + ReplaceMachineInstrInMaps(MI, fmi); + MI->eraseFromParent(); + MI = fmi; + ++numFolds; + return true; + } + return false; +} + +/// canFoldMemoryOperand - Returns true if the specified load / store +/// folding is possible. +bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI, + SmallVector<unsigned, 2> &Ops, + bool ReMat) const { + // Filter the list of operand indexes that are to be folded. Abort if + // any operand will prevent folding. + unsigned MRInfo = 0; + SmallVector<unsigned, 2> FoldOps; + if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) + return false; + + // It's only legal to remat for a use, not a def. + if (ReMat && (MRInfo & VirtRegMap::isMod)) + return false; + + return tii_->canFoldMemoryOperand(MI, FoldOps); +} + +bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const { + LiveInterval::Ranges::const_iterator itr = li.ranges.begin(); + + MachineBasicBlock *mbb = indexes_->getMBBCoveringRange(itr->start, itr->end); + + if (mbb == 0) + return false; + + for (++itr; itr != li.ranges.end(); ++itr) { + MachineBasicBlock *mbb2 = + indexes_->getMBBCoveringRange(itr->start, itr->end); + + if (mbb2 != mbb) + return false; + } + + return true; +} + +/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of +/// interval on to-be re-materialized operands of MI) with new register. +void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, + MachineInstr *MI, unsigned NewVReg, + VirtRegMap &vrm) { + // There is an implicit use. That means one of the other operand is + // being remat'ed and the remat'ed instruction has li.reg as an + // use operand. Make sure we rewrite that as well. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (!vrm.isReMaterialized(Reg)) + continue; + MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg); + MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg); + if (UseMO) + UseMO->setReg(NewVReg); + } +} + +/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions +/// for addIntervalsForSpills to rewrite uses / defs for the given live range. +bool LiveIntervals:: +rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, + bool TrySplit, SlotIndex index, SlotIndex end, + MachineInstr *MI, + MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, + unsigned Slot, int LdSlot, + bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, + VirtRegMap &vrm, + const TargetRegisterClass* rc, + SmallVector<int, 4> &ReMatIds, + const MachineLoopInfo *loopInfo, + unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse, + DenseMap<unsigned,unsigned> &MBBVRegsMap, + std::vector<LiveInterval*> &NewLIs) { + bool CanFold = false; + RestartInstruction: + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& mop = MI->getOperand(i); + if (!mop.isReg()) + continue; + unsigned Reg = mop.getReg(); + if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (Reg != li.reg) + continue; + + bool TryFold = !DefIsReMat; + bool FoldSS = true; // Default behavior unless it's a remat. + int FoldSlot = Slot; + if (DefIsReMat) { + // If this is the rematerializable definition MI itself and + // all of its uses are rematerialized, simply delete it. + if (MI == ReMatOrigDefMI && CanDelete) { + DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: " + << *MI << '\n'); + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + break; + } + + // If def for this use can't be rematerialized, then try folding. + // If def is rematerializable and it's a load, also try folding. + TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad)); + if (isLoad) { + // Try fold loads (from stack slot, constant pool, etc.) into uses. + FoldSS = isLoadSS; + FoldSlot = LdSlot; + } + } + + // Scan all of the operands of this instruction rewriting operands + // to use NewVReg instead of li.reg as appropriate. We do this for + // two reasons: + // + // 1. If the instr reads the same spilled vreg multiple times, we + // want to reuse the NewVReg. + // 2. If the instr is a two-addr instruction, we are required to + // keep the src/dst regs pinned. + // + // Keep track of whether we replace a use and/or def so that we can + // create the spill interval with the appropriate range. + SmallVector<unsigned, 2> Ops; + tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops); + + // Create a new virtual register for the spill interval. + // Create the new register now so we can map the fold instruction + // to the new register so when it is unfolded we get the correct + // answer. + bool CreatedNewVReg = false; + if (NewVReg == 0) { + NewVReg = mri_->createVirtualRegister(rc); + vrm.grow(); + CreatedNewVReg = true; + + // The new virtual register should get the same allocation hints as the + // old one. + std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg); + if (Hint.first || Hint.second) + mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second); + } + + if (!TryFold) + CanFold = false; + else { + // Do not fold load / store here if we are splitting. We'll find an + // optimal point to insert a load / store later. + if (!TrySplit) { + if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, + Ops, FoldSS, FoldSlot, NewVReg)) { + // Folding the load/store can completely change the instruction in + // unpredictable ways, rescan it from the beginning. + + if (FoldSS) { + // We need to give the new vreg the same stack slot as the + // spilled interval. + vrm.assignVirt2StackSlot(NewVReg, FoldSlot); + } + + HasUse = false; + HasDef = false; + CanFold = false; + if (isNotInMIMap(MI)) + break; + goto RestartInstruction; + } + } else { + // We'll try to fold it later if it's profitable. + CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat); + } + } + + mop.setReg(NewVReg); + if (mop.isImplicit()) + rewriteImplicitOps(li, MI, NewVReg, vrm); + + // Reuse NewVReg for other reads. + for (unsigned j = 0, e = Ops.size(); j != e; ++j) { + MachineOperand &mopj = MI->getOperand(Ops[j]); + mopj.setReg(NewVReg); + if (mopj.isImplicit()) + rewriteImplicitOps(li, MI, NewVReg, vrm); + } + + if (CreatedNewVReg) { + if (DefIsReMat) { + vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI); + if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) { + // Each valnum may have its own remat id. + ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg); + } else { + vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]); + } + if (!CanDelete || (HasUse && HasDef)) { + // If this is a two-addr instruction then its use operands are + // rematerializable but its def is not. It should be assigned a + // stack slot. + vrm.assignVirt2StackSlot(NewVReg, Slot); + } + } else { + vrm.assignVirt2StackSlot(NewVReg, Slot); + } + } else if (HasUse && HasDef && + vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) { + // If this interval hasn't been assigned a stack slot (because earlier + // def is a deleted remat def), do it now. + assert(Slot != VirtRegMap::NO_STACK_SLOT); + vrm.assignVirt2StackSlot(NewVReg, Slot); + } + + // Re-matting an instruction with virtual register use. Add the + // register as an implicit use on the use MI. + if (DefIsReMat && ImpUse) + MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); + + // Create a new register interval for this spill / remat. + LiveInterval &nI = getOrCreateInterval(NewVReg); + if (CreatedNewVReg) { + NewLIs.push_back(&nI); + MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg)); + if (TrySplit) + vrm.setIsSplitFromReg(NewVReg, li.reg); + } + + if (HasUse) { + if (CreatedNewVReg) { + LiveRange LR(index.getLoadIndex(), index.getDefIndex(), + nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator)); + DEBUG(dbgs() << " +" << LR); + nI.addRange(LR); + } else { + // Extend the split live interval to this def / use. + SlotIndex End = index.getDefIndex(); + LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End, + nI.getValNumInfo(nI.getNumValNums()-1)); + DEBUG(dbgs() << " +" << LR); + nI.addRange(LR); + } + } + if (HasDef) { + LiveRange LR(index.getDefIndex(), index.getStoreIndex(), + nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator)); + DEBUG(dbgs() << " +" << LR); + nI.addRange(LR); + } + + DEBUG({ + dbgs() << "\t\t\t\tAdded new interval: "; + nI.print(dbgs(), tri_); + dbgs() << '\n'; + }); + } + return CanFold; +} +bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, + const VNInfo *VNI, + MachineBasicBlock *MBB, + SlotIndex Idx) const { + return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB)); +} + +/// RewriteInfo - Keep track of machine instrs that will be rewritten +/// during spilling. +namespace { + struct RewriteInfo { + SlotIndex Index; + MachineInstr *MI; + RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {} + }; + + struct RewriteInfoCompare { + bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const { + return LHS.Index < RHS.Index; + } + }; +} + +void LiveIntervals:: +rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, + LiveInterval::Ranges::const_iterator &I, + MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, + unsigned Slot, int LdSlot, + bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, + VirtRegMap &vrm, + const TargetRegisterClass* rc, + SmallVector<int, 4> &ReMatIds, + const MachineLoopInfo *loopInfo, + BitVector &SpillMBBs, + DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes, + BitVector &RestoreMBBs, + DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes, + DenseMap<unsigned,unsigned> &MBBVRegsMap, + std::vector<LiveInterval*> &NewLIs) { + bool AllCanFold = true; + unsigned NewVReg = 0; + SlotIndex start = I->start.getBaseIndex(); + SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); + + // First collect all the def / use in this live range that will be rewritten. + // Make sure they are sorted according to instruction index. + std::vector<RewriteInfo> RewriteMIs; + for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), + re = mri_->reg_end(); ri != re; ) { + MachineInstr *MI = &*ri; + MachineOperand &O = ri.getOperand(); + ++ri; + if (MI->isDebugValue()) { + // Modify DBG_VALUE now that the value is in a spill slot. + if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) { + uint64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = MI->getOperand(2).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + int FI = isLoadSS ? LdSlot : (int)Slot; + if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI, + Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + ReplaceMachineInstrInMaps(MI, NewDV); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + continue; + } + } + + DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + continue; + } + assert(!(O.isImplicit() && O.isUse()) && + "Spilling register that's used as implicit use?"); + SlotIndex index = getInstructionIndex(MI); + if (index < start || index >= end) + continue; + + if (O.isUndef()) + // Must be defined by an implicit def. It should not be spilled. Note, + // this is for correctness reason. e.g. + // 8 %reg1024<def> = IMPLICIT_DEF + // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 + // The live range [12, 14) are not part of the r1024 live interval since + // it's defined by an implicit def. It will not conflicts with live + // interval of r1025. Now suppose both registers are spilled, you can + // easily see a situation where both registers are reloaded before + // the INSERT_SUBREG and both target registers that would overlap. + continue; + RewriteMIs.push_back(RewriteInfo(index, MI)); + } + std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare()); + + unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0; + // Now rewrite the defs and uses. + for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) { + RewriteInfo &rwi = RewriteMIs[i]; + ++i; + SlotIndex index = rwi.Index; + MachineInstr *MI = rwi.MI; + // If MI def and/or use the same register multiple times, then there + // are multiple entries. + while (i != e && RewriteMIs[i].MI == MI) { + assert(RewriteMIs[i].Index == index); + ++i; + } + MachineBasicBlock *MBB = MI->getParent(); + + if (ImpUse && MI != ReMatDefMI) { + // Re-matting an instruction with virtual register use. Prevent interval + // from being spilled. + getInterval(ImpUse).markNotSpillable(); + } + + unsigned MBBId = MBB->getNumber(); + unsigned ThisVReg = 0; + if (TrySplit) { + DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId); + if (NVI != MBBVRegsMap.end()) { + ThisVReg = NVI->second; + // One common case: + // x = use + // ... + // ... + // def = ... + // = use + // It's better to start a new interval to avoid artifically + // extend the new interval. + if (MI->readsWritesVirtualRegister(li.reg) == + std::make_pair(false,true)) { + MBBVRegsMap.erase(MBB->getNumber()); + ThisVReg = 0; + } + } + } + + bool IsNew = ThisVReg == 0; + if (IsNew) { + // This ends the previous live interval. If all of its def / use + // can be folded, give it a low spill weight. + if (NewVReg && TrySplit && AllCanFold) { + LiveInterval &nI = getOrCreateInterval(NewVReg); + nI.weight /= 10.0F; + } + AllCanFold = true; + } + NewVReg = ThisVReg; + + bool HasDef = false; + bool HasUse = false; + bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit, + index, end, MI, ReMatOrigDefMI, ReMatDefMI, + Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, + CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg, + ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs); + if (!HasDef && !HasUse) + continue; + + AllCanFold &= CanFold; + + // Update weight of spill interval. + LiveInterval &nI = getOrCreateInterval(NewVReg); + if (!TrySplit) { + // The spill weight is now infinity as it cannot be spilled again. + nI.markNotSpillable(); + continue; + } + + // Keep track of the last def and first use in each MBB. + if (HasDef) { + if (MI != ReMatOrigDefMI || !CanDelete) { + bool HasKill = false; + if (!HasUse) + HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex()); + else { + // If this is a two-address code, then this index starts a new VNInfo. + const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex()); + if (VNI) + HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex()); + } + DenseMap<unsigned, std::vector<SRInfo> >::iterator SII = + SpillIdxes.find(MBBId); + if (!HasKill) { + if (SII == SpillIdxes.end()) { + std::vector<SRInfo> S; + S.push_back(SRInfo(index, NewVReg, true)); + SpillIdxes.insert(std::make_pair(MBBId, S)); + } else if (SII->second.back().vreg != NewVReg) { + SII->second.push_back(SRInfo(index, NewVReg, true)); + } else if (index > SII->second.back().index) { + // If there is an earlier def and this is a two-address + // instruction, then it's not possible to fold the store (which + // would also fold the load). + SRInfo &Info = SII->second.back(); + Info.index = index; + Info.canFold = !HasUse; + } + SpillMBBs.set(MBBId); + } else if (SII != SpillIdxes.end() && + SII->second.back().vreg == NewVReg && + index > SII->second.back().index) { + // There is an earlier def that's not killed (must be two-address). + // The spill is no longer needed. + SII->second.pop_back(); + if (SII->second.empty()) { + SpillIdxes.erase(MBBId); + SpillMBBs.reset(MBBId); + } + } + } + } + + if (HasUse) { + DenseMap<unsigned, std::vector<SRInfo> >::iterator SII = + SpillIdxes.find(MBBId); + if (SII != SpillIdxes.end() && + SII->second.back().vreg == NewVReg && + index > SII->second.back().index) + // Use(s) following the last def, it's not safe to fold the spill. + SII->second.back().canFold = false; + DenseMap<unsigned, std::vector<SRInfo> >::iterator RII = + RestoreIdxes.find(MBBId); + if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg) + // If we are splitting live intervals, only fold if it's the first + // use and there isn't another use later in the MBB. + RII->second.back().canFold = false; + else if (IsNew) { + // Only need a reload if there isn't an earlier def / use. + if (RII == RestoreIdxes.end()) { + std::vector<SRInfo> Infos; + Infos.push_back(SRInfo(index, NewVReg, true)); + RestoreIdxes.insert(std::make_pair(MBBId, Infos)); + } else { + RII->second.push_back(SRInfo(index, NewVReg, true)); + } + RestoreMBBs.set(MBBId); + } + } + + // Update spill weight. + unsigned loopDepth = loopInfo->getLoopDepth(MBB); + nI.weight += getSpillWeight(HasDef, HasUse, loopDepth); + } + + if (NewVReg && TrySplit && AllCanFold) { + // If all of its def / use can be folded, give it a low spill weight. + LiveInterval &nI = getOrCreateInterval(NewVReg); + nI.weight /= 10.0F; + } +} + +bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index, + unsigned vr, BitVector &RestoreMBBs, + DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) { + if (!RestoreMBBs[Id]) + return false; + std::vector<SRInfo> &Restores = RestoreIdxes[Id]; + for (unsigned i = 0, e = Restores.size(); i != e; ++i) + if (Restores[i].index == index && + Restores[i].vreg == vr && + Restores[i].canFold) + return true; + return false; +} + +void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index, + unsigned vr, BitVector &RestoreMBBs, + DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) { + if (!RestoreMBBs[Id]) + return; + std::vector<SRInfo> &Restores = RestoreIdxes[Id]; + for (unsigned i = 0, e = Restores.size(); i != e; ++i) + if (Restores[i].index == index && Restores[i].vreg) + Restores[i].index = SlotIndex(); +} + +/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being +/// spilled and create empty intervals for their uses. +void +LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm, + const TargetRegisterClass* rc, + std::vector<LiveInterval*> &NewLIs) { + for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), + re = mri_->reg_end(); ri != re; ) { + MachineOperand &O = ri.getOperand(); + MachineInstr *MI = &*ri; + ++ri; + if (MI->isDebugValue()) { + // Remove debug info for now. + O.setReg(0U); + DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); + continue; + } + if (O.isDef()) { + assert(MI->isImplicitDef() && + "Register def was not rewritten?"); + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } else { + // This must be an use of an implicit_def so it's not part of the live + // interval. Create a new empty live interval for it. + // FIXME: Can we simply erase some of the instructions? e.g. Stores? + unsigned NewVReg = mri_->createVirtualRegister(rc); + vrm.grow(); + vrm.setIsImplicitlyDefined(NewVReg); + NewLIs.push_back(&getOrCreateInterval(NewVReg)); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == li.reg) { + MO.setReg(NewVReg); + MO.setIsUndef(); + } + } + } + } +} + +float +LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { + // Limit the loop depth ridiculousness. + if (loopDepth > 200) + loopDepth = 200; + + // The loop depth is used to roughly estimate the number of times the + // instruction is executed. Something like 10^d is simple, but will quickly + // overflow a float. This expression behaves like 10^d for small d, but is + // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of + // headroom before overflow. + float lc = std::pow(1 + (100.0f / (loopDepth+10)), (float)loopDepth); + + return (isDef + isUse) * lc; +} + +void +LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) { + for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) + normalizeSpillWeight(*NewLIs[i]); +} + +std::vector<LiveInterval*> LiveIntervals:: +addIntervalsForSpills(const LiveInterval &li, + SmallVectorImpl<LiveInterval*> &SpillIs, + const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { + assert(li.isSpillable() && "attempt to spill already spilled interval!"); + + DEBUG({ + dbgs() << "\t\t\t\tadding intervals for spills for interval: "; + li.print(dbgs(), tri_); + dbgs() << '\n'; + }); + + // Each bit specify whether a spill is required in the MBB. + BitVector SpillMBBs(mf_->getNumBlockIDs()); + DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes; + BitVector RestoreMBBs(mf_->getNumBlockIDs()); + DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes; + DenseMap<unsigned,unsigned> MBBVRegsMap; + std::vector<LiveInterval*> NewLIs; + const TargetRegisterClass* rc = mri_->getRegClass(li.reg); + + unsigned NumValNums = li.getNumValNums(); + SmallVector<MachineInstr*, 4> ReMatDefs; + ReMatDefs.resize(NumValNums, NULL); + SmallVector<MachineInstr*, 4> ReMatOrigDefs; + ReMatOrigDefs.resize(NumValNums, NULL); + SmallVector<int, 4> ReMatIds; + ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT); + BitVector ReMatDelete(NumValNums); + unsigned Slot = VirtRegMap::MAX_STACK_SLOT; + + // Spilling a split live interval. It cannot be split any further. Also, + // it's also guaranteed to be a single val# / range interval. + if (vrm.getPreSplitReg(li.reg)) { + vrm.setIsSplitFromReg(li.reg, 0); + // Unset the split kill marker on the last use. + SlotIndex KillIdx = vrm.getKillPoint(li.reg); + if (KillIdx != SlotIndex()) { + MachineInstr *KillMI = getInstructionFromIndex(KillIdx); + assert(KillMI && "Last use disappeared?"); + int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true); + assert(KillOp != -1 && "Last use disappeared?"); + KillMI->getOperand(KillOp).setIsKill(false); + } + vrm.removeKillPoint(li.reg); + bool DefIsReMat = vrm.isReMaterialized(li.reg); + Slot = vrm.getStackSlot(li.reg); + assert(Slot != VirtRegMap::MAX_STACK_SLOT); + MachineInstr *ReMatDefMI = DefIsReMat ? + vrm.getReMaterializedMI(li.reg) : NULL; + int LdSlot = 0; + bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); + bool isLoad = isLoadSS || + (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad())); + bool IsFirstRange = true; + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + // If this is a split live interval with multiple ranges, it means there + // are two-address instructions that re-defined the value. Only the + // first def can be rematerialized! + if (IsFirstRange) { + // Note ReMatOrigDefMI has already been deleted. + rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI, + Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, + false, vrm, rc, ReMatIds, loopInfo, + SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, + MBBVRegsMap, NewLIs); + } else { + rewriteInstructionsForSpills(li, false, I, NULL, 0, + Slot, 0, false, false, false, + false, vrm, rc, ReMatIds, loopInfo, + SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, + MBBVRegsMap, NewLIs); + } + IsFirstRange = false; + } + + handleSpilledImpDefs(li, vrm, rc, NewLIs); + normalizeSpillWeights(NewLIs); + return NewLIs; + } + + bool TrySplit = !intervalIsInOneMBB(li); + if (TrySplit) + ++numSplits; + bool NeedStackSlot = false; + for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); + i != e; ++i) { + const VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (VNI->isUnused()) + continue; // Dead val#. + // Is the def for the val# rematerializable? + MachineInstr *ReMatDefMI = VNI->isDefAccurate() + ? getInstructionFromIndex(VNI->def) : 0; + bool dummy; + if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) { + // Remember how to remat the def of this val#. + ReMatOrigDefs[VN] = ReMatDefMI; + // Original def may be modified so we have to make a copy here. + MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI); + CloneMIs.push_back(Clone); + ReMatDefs[VN] = Clone; + + bool CanDelete = true; + if (VNI->hasPHIKill()) { + // A kill is a phi node, not all of its uses can be rematerialized. + // It must not be deleted. + CanDelete = false; + // Need a stack slot if there is any live range where uses cannot be + // rematerialized. + NeedStackSlot = true; + } + if (CanDelete) + ReMatDelete.set(VN); + } else { + // Need a stack slot if there is any live range where uses cannot be + // rematerialized. + NeedStackSlot = true; + } + } + + // One stack slot per live interval. + if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) { + if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT) + Slot = vrm.assignVirt2StackSlot(li.reg); + + // This case only occurs when the prealloc splitter has already assigned + // a stack slot to this vreg. + else + Slot = vrm.getStackSlot(li.reg); + } + + // Create new intervals and rewrite defs and uses. + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id]; + MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id]; + bool DefIsReMat = ReMatDefMI != NULL; + bool CanDelete = ReMatDelete[I->valno->id]; + int LdSlot = 0; + bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); + bool isLoad = isLoadSS || + (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad()); + rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI, + Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, + CanDelete, vrm, rc, ReMatIds, loopInfo, + SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, + MBBVRegsMap, NewLIs); + } + + // Insert spills / restores if we are splitting. + if (!TrySplit) { + handleSpilledImpDefs(li, vrm, rc, NewLIs); + normalizeSpillWeights(NewLIs); + return NewLIs; + } + + SmallPtrSet<LiveInterval*, 4> AddedKill; + SmallVector<unsigned, 2> Ops; + if (NeedStackSlot) { + int Id = SpillMBBs.find_first(); + while (Id != -1) { + std::vector<SRInfo> &spills = SpillIdxes[Id]; + for (unsigned i = 0, e = spills.size(); i != e; ++i) { + SlotIndex index = spills[i].index; + unsigned VReg = spills[i].vreg; + LiveInterval &nI = getOrCreateInterval(VReg); + bool isReMat = vrm.isReMaterialized(VReg); + MachineInstr *MI = getInstructionFromIndex(index); + bool CanFold = false; + bool FoundUse = false; + Ops.clear(); + if (spills[i].canFold) { + CanFold = true; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg() || MO.getReg() != VReg) + continue; + + Ops.push_back(j); + if (MO.isDef()) + continue; + if (isReMat || + (!FoundUse && !alsoFoldARestore(Id, index, VReg, + RestoreMBBs, RestoreIdxes))) { + // MI has two-address uses of the same register. If the use + // isn't the first and only use in the BB, then we can't fold + // it. FIXME: Move this to rewriteInstructionsForSpills. + CanFold = false; + break; + } + FoundUse = true; + } + } + // Fold the store into the def if possible. + bool Folded = false; + if (CanFold && !Ops.empty()) { + if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){ + Folded = true; + if (FoundUse) { + // Also folded uses, do not issue a load. + eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes); + nI.removeRange(index.getLoadIndex(), index.getDefIndex()); + } + nI.removeRange(index.getDefIndex(), index.getStoreIndex()); + } + } + + // Otherwise tell the spiller to issue a spill. + if (!Folded) { + LiveRange *LR = &nI.ranges[nI.ranges.size()-1]; + bool isKill = LR->end == index.getStoreIndex(); + if (!MI->registerDefIsDead(nI.reg)) + // No need to spill a dead def. + vrm.addSpillPoint(VReg, isKill, MI); + if (isKill) + AddedKill.insert(&nI); + } + } + Id = SpillMBBs.find_next(Id); + } + } + + int Id = RestoreMBBs.find_first(); + while (Id != -1) { + std::vector<SRInfo> &restores = RestoreIdxes[Id]; + for (unsigned i = 0, e = restores.size(); i != e; ++i) { + SlotIndex index = restores[i].index; + if (index == SlotIndex()) + continue; + unsigned VReg = restores[i].vreg; + LiveInterval &nI = getOrCreateInterval(VReg); + bool isReMat = vrm.isReMaterialized(VReg); + MachineInstr *MI = getInstructionFromIndex(index); + bool CanFold = false; + Ops.clear(); + if (restores[i].canFold) { + CanFold = true; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg() || MO.getReg() != VReg) + continue; + + if (MO.isDef()) { + // If this restore were to be folded, it would have been folded + // already. + CanFold = false; + break; + } + Ops.push_back(j); + } + } + + // Fold the load into the use if possible. + bool Folded = false; + if (CanFold && !Ops.empty()) { + if (!isReMat) + Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg); + else { + MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg); + int LdSlot = 0; + bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); + // If the rematerializable def is a load, also try to fold it. + if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad()) + Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, + Ops, isLoadSS, LdSlot, VReg); + if (!Folded) { + unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI); + if (ImpUse) { + // Re-matting an instruction with virtual register use. Add the + // register as an implicit use on the use MI and mark the register + // interval as unspillable. + LiveInterval &ImpLi = getInterval(ImpUse); + ImpLi.markNotSpillable(); + MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); + } + } + } + } + // If folding is not possible / failed, then tell the spiller to issue a + // load / rematerialization for us. + if (Folded) + nI.removeRange(index.getLoadIndex(), index.getDefIndex()); + else + vrm.addRestorePoint(VReg, MI); + } + Id = RestoreMBBs.find_next(Id); + } + + // Finalize intervals: add kills, finalize spill weights, and filter out + // dead intervals. + std::vector<LiveInterval*> RetNewLIs; + for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) { + LiveInterval *LI = NewLIs[i]; + if (!LI->empty()) { + if (!AddedKill.count(LI)) { + LiveRange *LR = &LI->ranges[LI->ranges.size()-1]; + SlotIndex LastUseIdx = LR->end.getBaseIndex(); + MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx); + int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false); + assert(UseIdx != -1); + if (!LastUse->isRegTiedToDefOperand(UseIdx)) { + LastUse->getOperand(UseIdx).setIsKill(); + vrm.addKillPoint(LI->reg, LastUseIdx); + } + } + RetNewLIs.push_back(LI); + } + } + + handleSpilledImpDefs(li, vrm, rc, RetNewLIs); + normalizeSpillWeights(RetNewLIs); + return RetNewLIs; +} + +/// hasAllocatableSuperReg - Return true if the specified physical register has +/// any super register that's allocatable. +bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const { + for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) + if (allocatableRegs_[*AS] && hasInterval(*AS)) + return true; + return false; +} + +/// getRepresentativeReg - Find the largest super register of the specified +/// physical register. +unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const { + // Find the largest super-register that is allocatable. + unsigned BestReg = Reg; + for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) { + unsigned SuperReg = *AS; + if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) { + BestReg = SuperReg; + break; + } + } + return BestReg; +} + +/// getNumConflictsWithPhysReg - Return the number of uses and defs of the +/// specified interval that conflicts with the specified physical register. +unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li, + unsigned PhysReg) const { + unsigned NumConflicts = 0; + const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg)); + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), + E = mri_->reg_end(); I != E; ++I) { + MachineOperand &O = I.getOperand(); + MachineInstr *MI = O.getParent(); + if (MI->isDebugValue()) + continue; + SlotIndex Index = getInstructionIndex(MI); + if (pli.liveAt(Index)) + ++NumConflicts; + } + return NumConflicts; +} + +/// spillPhysRegAroundRegDefsUses - Spill the specified physical register +/// around all defs and uses of the specified interval. Return true if it +/// was able to cut its interval. +bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, + unsigned PhysReg, VirtRegMap &vrm) { + unsigned SpillReg = getRepresentativeReg(PhysReg); + + for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS) + // If there are registers which alias PhysReg, but which are not a + // sub-register of the chosen representative super register. Assert + // since we can't handle it yet. + assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) || + tri_->isSuperRegister(*AS, SpillReg)); + + bool Cut = false; + SmallVector<unsigned, 4> PRegs; + if (hasInterval(SpillReg)) + PRegs.push_back(SpillReg); + else { + SmallSet<unsigned, 4> Added; + for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) + if (Added.insert(*AS) && hasInterval(*AS)) { + PRegs.push_back(*AS); + for (const unsigned* ASS = tri_->getSubRegisters(*AS); *ASS; ++ASS) + Added.insert(*ASS); + } + } + + SmallPtrSet<MachineInstr*, 8> SeenMIs; + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), + E = mri_->reg_end(); I != E; ++I) { + MachineOperand &O = I.getOperand(); + MachineInstr *MI = O.getParent(); + if (MI->isDebugValue() || SeenMIs.count(MI)) + continue; + SeenMIs.insert(MI); + SlotIndex Index = getInstructionIndex(MI); + for (unsigned i = 0, e = PRegs.size(); i != e; ++i) { + unsigned PReg = PRegs[i]; + LiveInterval &pli = getInterval(PReg); + if (!pli.liveAt(Index)) + continue; + vrm.addEmergencySpill(PReg, MI); + SlotIndex StartIdx = Index.getLoadIndex(); + SlotIndex EndIdx = Index.getNextIndex().getBaseIndex(); + if (pli.isInOneLiveRange(StartIdx, EndIdx)) { + pli.removeRange(StartIdx, EndIdx); + Cut = true; + } else { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; + if (MI->isInlineAsm()) { + Msg << "\nPlease check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(Msg, tm_); + } + report_fatal_error(Msg.str()); + } + for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) { + if (!hasInterval(*AS)) + continue; + LiveInterval &spli = getInterval(*AS); + if (spli.liveAt(Index)) + spli.removeRange(Index.getLoadIndex(), + Index.getNextIndex().getBaseIndex()); + } + } + } + return Cut; +} + +LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, + MachineInstr* startInst) { + LiveInterval& Interval = getOrCreateInterval(reg); + VNInfo* VN = Interval.getNextValue( + SlotIndex(getInstructionIndex(startInst).getDefIndex()), + startInst, true, getVNInfoAllocator()); + VN->setHasPHIKill(true); + LiveRange LR( + SlotIndex(getInstructionIndex(startInst).getDefIndex()), + getMBBEndIdx(startInst->getParent()), VN); + Interval.addRange(LR); + + return LR; +} + diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp new file mode 100644 index 0000000..b5c385f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp @@ -0,0 +1,64 @@ +//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the live stack slot analysis pass. It is analogous to +// live interval analysis except it's analyzing liveness of stack slots rather +// than registers. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "livestacks" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include <limits> +using namespace llvm; + +char LiveStacks::ID = 0; +INITIALIZE_PASS(LiveStacks, "livestacks", + "Live Stack Slot Analysis", false, false); + +void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addPreserved<SlotIndexes>(); + AU.addRequiredTransitive<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void LiveStacks::releaseMemory() { + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); + S2IMap.clear(); + S2RCMap.clear(); +} + +bool LiveStacks::runOnMachineFunction(MachineFunction &) { + // FIXME: No analysis is being done right now. We are relying on the + // register allocators to provide the information. + return false; +} + +/// print - Implement the dump method. +void LiveStacks::print(raw_ostream &OS, const Module*) const { + + OS << "********** INTERVALS **********\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) { + I->second.print(OS); + int Slot = I->first; + const TargetRegisterClass *RC = getIntervalRegClass(Slot); + if (RC) + OS << " [" << RC->getName() << "]\n"; + else + OS << " [Unknown]\n"; + } +} diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp new file mode 100644 index 0000000..375307b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -0,0 +1,784 @@ +//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveVariable analysis pass. For each machine +// instruction in the function, this pass calculates the set of registers that +// are immediately dead after the instruction (i.e., the instruction calculates +// the value, but it is never used) and the set of registers that are used by +// the instruction, but are never used after the instruction (i.e., they are +// killed). +// +// This class computes live variables using are sparse implementation based on +// the machine code SSA form. This class computes live variable information for +// each virtual and _register allocatable_ physical register in a function. It +// uses the dominance properties of SSA form to efficiently compute live +// variables for virtual registers, and assumes that physical registers are only +// live within a single basic block (allowing it to do a single local analysis +// to resolve physical register lifetimes in each basic block). If a physical +// register is not register allocatable, it is not tracked. This is useful for +// things like the stack pointer and condition codes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +using namespace llvm; + +char LiveVariables::ID = 0; +INITIALIZE_PASS(LiveVariables, "livevars", + "Live Variable Analysis", false, false); + + +void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(UnreachableMachineBlockElimID); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr * +LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + if (Kills[i]->getParent() == MBB) + return Kills[i]; + return NULL; +} + +void LiveVariables::VarInfo::dump() const { + dbgs() << " Alive in blocks: "; + for (SparseBitVector<>::iterator I = AliveBlocks.begin(), + E = AliveBlocks.end(); I != E; ++I) + dbgs() << *I << ", "; + dbgs() << "\n Killed by:"; + if (Kills.empty()) + dbgs() << " No instructions.\n"; + else { + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + dbgs() << "\n #" << i << ": " << *Kills[i]; + dbgs() << "\n"; + } +} + +/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. +LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { + assert(TargetRegisterInfo::isVirtualRegister(RegIdx) && + "getVarInfo: not a virtual register!"); + RegIdx -= TargetRegisterInfo::FirstVirtualRegister; + if (RegIdx >= VirtRegInfo.size()) { + if (RegIdx >= 2*VirtRegInfo.size()) + VirtRegInfo.resize(RegIdx*2); + else + VirtRegInfo.resize(2*VirtRegInfo.size()); + } + return VirtRegInfo[RegIdx]; +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, + MachineBasicBlock *DefBlock, + MachineBasicBlock *MBB, + std::vector<MachineBasicBlock*> &WorkList) { + unsigned BBNum = MBB->getNumber(); + + // Check to see if this basic block is one of the killing blocks. If so, + // remove it. + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + if (VRInfo.Kills[i]->getParent() == MBB) { + VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry + break; + } + + if (MBB == DefBlock) return; // Terminate recursion + + if (VRInfo.AliveBlocks.test(BBNum)) + return; // We already know the block is live + + // Mark the variable known alive in this bb + VRInfo.AliveBlocks.set(BBNum); + + for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(), + E = MBB->pred_rend(); PI != E; ++PI) + WorkList.push_back(*PI); +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, + MachineBasicBlock *DefBlock, + MachineBasicBlock *MBB) { + std::vector<MachineBasicBlock*> WorkList; + MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList); + + while (!WorkList.empty()) { + MachineBasicBlock *Pred = WorkList.back(); + WorkList.pop_back(); + MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList); + } +} + +void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, + MachineInstr *MI) { + assert(MRI->getVRegDef(reg) && "Register use before def!"); + + unsigned BBNum = MBB->getNumber(); + + VarInfo& VRInfo = getVarInfo(reg); + VRInfo.NumUses++; + + // Check to see if this basic block is already a kill block. + if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) { + // Yes, this register is killed in this basic block already. Increase the + // live range by updating the kill instruction. + VRInfo.Kills.back() = MI; + return; + } + +#ifndef NDEBUG + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!"); +#endif + + // This situation can occur: + // + // ,------. + // | | + // | v + // | t2 = phi ... t1 ... + // | | + // | v + // | t1 = ... + // | ... = ... t1 ... + // | | + // `------' + // + // where there is a use in a PHI node that's a predecessor to the defining + // block. We don't want to mark all predecessors as having the value "alive" + // in this case. + if (MBB == MRI->getVRegDef(reg)->getParent()) return; + + // Add a new kill entry for this basic block. If this virtual register is + // already marked as alive in this basic block, that means it is alive in at + // least one of the successor blocks, it's not a kill. + if (!VRInfo.AliveBlocks.test(BBNum)) + VRInfo.Kills.push_back(MI); + + // Update all dominating blocks to mark them as "known live". + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + E = MBB->pred_end(); PI != E; ++PI) + MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI); +} + +void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) { + VarInfo &VRInfo = getVarInfo(Reg); + + if (VRInfo.AliveBlocks.empty()) + // If vr is not alive in any block, then defaults to dead. + VRInfo.Kills.push_back(MI); +} + +/// FindLastPartialDef - Return the last partial def of the specified register. +/// Also returns the sub-registers that're defined by the instruction. +MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, + SmallSet<unsigned,4> &PartDefRegs) { + unsigned LastDefReg = 0; + unsigned LastDefDist = 0; + MachineInstr *LastDef = NULL; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + MachineInstr *Def = PhysRegDef[SubReg]; + if (!Def) + continue; + unsigned Dist = DistanceMap[Def]; + if (Dist > LastDefDist) { + LastDefReg = SubReg; + LastDef = Def; + LastDefDist = Dist; + } + } + + if (!LastDef) + return 0; + + PartDefRegs.insert(LastDefReg); + for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) { + MachineOperand &MO = LastDef->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) + continue; + unsigned DefReg = MO.getReg(); + if (TRI->isSubRegister(Reg, DefReg)) { + PartDefRegs.insert(DefReg); + for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg); + unsigned SubReg = *SubRegs; ++SubRegs) + PartDefRegs.insert(SubReg); + } + } + return LastDef; +} + +/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add +/// implicit defs to a machine instruction if there was an earlier def of its +/// super-register. +void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { + MachineInstr *LastDef = PhysRegDef[Reg]; + // If there was a previous use or a "full" def all is well. + if (!LastDef && !PhysRegUse[Reg]) { + // Otherwise, the last sub-register def implicitly defines this register. + // e.g. + // AH = + // AL = ... <imp-def EAX>, <imp-kill AH> + // = AH + // ... + // = EAX + // All of the sub-registers must have been defined before the use of Reg! + SmallSet<unsigned, 4> PartDefRegs; + MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs); + // If LastPartialDef is NULL, it must be using a livein register. + if (LastPartialDef) { + LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[Reg] = LastPartialDef; + SmallSet<unsigned, 8> Processed; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (Processed.count(SubReg)) + continue; + if (PartDefRegs.count(SubReg)) + continue; + // This part of Reg was defined before the last partial def. It's killed + // here. + LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg, + false/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[SubReg] = LastPartialDef; + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + Processed.insert(*SS); + } + } + } + else if (LastDef && !PhysRegUse[Reg] && + !LastDef->findRegisterDefOperand(Reg)) + // Last def defines the super register, add an implicit def of reg. + LastDef->addOperand(MachineOperand::CreateReg(Reg, + true/*IsDef*/, true/*IsImp*/)); + + // Remember this use. + PhysRegUse[Reg] = MI; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + PhysRegUse[SubReg] = MI; +} + +/// FindLastRefOrPartRef - Return the last reference or partial reference of +/// the specified register. +MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { + MachineInstr *LastDef = PhysRegDef[Reg]; + MachineInstr *LastUse = PhysRegUse[Reg]; + if (!LastDef && !LastUse) + return 0; + + MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; + unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; + unsigned LastPartDefDist = 0; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + MachineInstr *Def = PhysRegDef[SubReg]; + if (Def && Def != LastDef) { + // There was a def of this sub-register in between. This is a partial + // def, keep track of the last one. + unsigned Dist = DistanceMap[Def]; + if (Dist > LastPartDefDist) + LastPartDefDist = Dist; + } else if (MachineInstr *Use = PhysRegUse[SubReg]) { + unsigned Dist = DistanceMap[Use]; + if (Dist > LastRefOrPartRefDist) { + LastRefOrPartRefDist = Dist; + LastRefOrPartRef = Use; + } + } + } + + return LastRefOrPartRef; +} + +bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { + MachineInstr *LastDef = PhysRegDef[Reg]; + MachineInstr *LastUse = PhysRegUse[Reg]; + if (!LastDef && !LastUse) + return false; + + MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; + unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; + // The whole register is used. + // AL = + // AH = + // + // = AX + // = AL, AX<imp-use, kill> + // AX = + // + // Or whole register is defined, but not used at all. + // AX<dead> = + // ... + // AX = + // + // Or whole register is defined, but only partly used. + // AX<dead> = AL<imp-def> + // = AL<kill> + // AX = + MachineInstr *LastPartDef = 0; + unsigned LastPartDefDist = 0; + SmallSet<unsigned, 8> PartUses; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + MachineInstr *Def = PhysRegDef[SubReg]; + if (Def && Def != LastDef) { + // There was a def of this sub-register in between. This is a partial + // def, keep track of the last one. + unsigned Dist = DistanceMap[Def]; + if (Dist > LastPartDefDist) { + LastPartDefDist = Dist; + LastPartDef = Def; + } + continue; + } + if (MachineInstr *Use = PhysRegUse[SubReg]) { + PartUses.insert(SubReg); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + PartUses.insert(*SS); + unsigned Dist = DistanceMap[Use]; + if (Dist > LastRefOrPartRefDist) { + LastRefOrPartRefDist = Dist; + LastRefOrPartRef = Use; + } + } + } + + if (!PhysRegUse[Reg]) { + // Partial uses. Mark register def dead and add implicit def of + // sub-registers which are used. + // EAX<dead> = op AL<imp-def> + // That is, EAX def is dead but AL def extends pass it. + PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (!PartUses.count(SubReg)) + continue; + bool NeedDef = true; + if (PhysRegDef[Reg] == PhysRegDef[SubReg]) { + MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg); + if (MO) { + NeedDef = false; + assert(!MO->isDead()); + } + } + if (NeedDef) + PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg, + true/*IsDef*/, true/*IsImp*/)); + MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg); + if (LastSubRef) + LastSubRef->addRegisterKilled(SubReg, TRI, true); + else { + LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); + PhysRegUse[SubReg] = LastRefOrPartRef; + for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg); + unsigned SSReg = *SSRegs; ++SSRegs) + PhysRegUse[SSReg] = LastRefOrPartRef; + } + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + PartUses.erase(*SS); + } + } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { + if (LastPartDef) + // The last partial def kills the register. + LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/, true/*IsKill*/)); + else { + MachineOperand *MO = + LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI); + bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg; + // If the last reference is the last def, then it's not used at all. + // That is, unless we are currently processing the last reference itself. + LastRefOrPartRef->addRegisterDead(Reg, TRI, true); + if (NeedEC) { + // If we are adding a subreg def and the superreg def is marked early + // clobber, add an early clobber marker to the subreg def. + MO = LastRefOrPartRef->findRegisterDefOperand(Reg); + if (MO) + MO->setIsEarlyClobber(); + } + } + } else + LastRefOrPartRef->addRegisterKilled(Reg, TRI, true); + return true; +} + +void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, + SmallVector<unsigned, 4> &Defs) { + // What parts of the register are previously defined? + SmallSet<unsigned, 32> Live; + if (PhysRegDef[Reg] || PhysRegUse[Reg]) { + Live.insert(Reg); + for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) + Live.insert(*SS); + } else { + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + // If a register isn't itself defined, but all parts that make up of it + // are defined, then consider it also defined. + // e.g. + // AL = + // AH = + // = AX + if (Live.count(SubReg)) + continue; + if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { + Live.insert(SubReg); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + Live.insert(*SS); + } + } + } + + // Start from the largest piece, find the last time any part of the register + // is referenced. + HandlePhysRegKill(Reg, MI); + // Only some of the sub-registers are used. + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (!Live.count(SubReg)) + // Skip if this sub-register isn't defined. + continue; + HandlePhysRegKill(SubReg, MI); + } + + if (MI) + Defs.push_back(Reg); // Remember this def. +} + +void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, + SmallVector<unsigned, 4> &Defs) { + while (!Defs.empty()) { + unsigned Reg = Defs.back(); + Defs.pop_back(); + PhysRegDef[Reg] = MI; + PhysRegUse[Reg] = NULL; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + PhysRegDef[SubReg] = MI; + PhysRegUse[SubReg] = NULL; + } + } +} + +bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + MRI = &mf.getRegInfo(); + TRI = MF->getTarget().getRegisterInfo(); + + ReservedRegisters = TRI->getReservedRegs(mf); + + unsigned NumRegs = TRI->getNumRegs(); + PhysRegDef = new MachineInstr*[NumRegs]; + PhysRegUse = new MachineInstr*[NumRegs]; + PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()]; + std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); + std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + PHIJoins.clear(); + + /// Get some space for a respectable number of registers. + VirtRegInfo.resize(64); + + analyzePHINodes(mf); + + // Calculate live variable information in depth first order on the CFG of the + // function. This guarantees that we will see the definition of a virtual + // register before its uses due to dominance properties of SSA (except for PHI + // nodes, which are treated as a special case). + MachineBasicBlock *Entry = MF->begin(); + SmallPtrSet<MachineBasicBlock*,16> Visited; + + for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + MachineBasicBlock *MBB = *DFI; + + // Mark live-in registers as live-in. + SmallVector<unsigned, 4> Defs; + for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), + EE = MBB->livein_end(); II != EE; ++II) { + assert(TargetRegisterInfo::isPhysicalRegister(*II) && + "Cannot have a live-in virtual register!"); + HandlePhysRegDef(*II, 0, Defs); + } + + // Loop over all of the instructions, processing them. + DistanceMap.clear(); + unsigned Dist = 0; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + DistanceMap.insert(std::make_pair(MI, Dist++)); + + // Process all of the operands of the instruction... + unsigned NumOperandsToProcess = MI->getNumOperands(); + + // Unless it is a PHI node. In this case, ONLY process the DEF, not any + // of the uses. They will be handled in other basic blocks. + if (MI->isPHI()) + NumOperandsToProcess = 1; + + // Clear kill and dead markers. LV will recompute them. + SmallVector<unsigned, 4> UseRegs; + SmallVector<unsigned, 4> DefRegs; + for (unsigned i = 0; i != NumOperandsToProcess; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) { + MO.setIsKill(false); + UseRegs.push_back(MOReg); + } else /*MO.isDef()*/ { + MO.setIsDead(false); + DefRegs.push_back(MOReg); + } + } + + // Process all uses. + for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { + unsigned MOReg = UseRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegUse(MOReg, MBB, MI); + else if (!ReservedRegisters[MOReg]) + HandlePhysRegUse(MOReg, MI); + } + + // Process all defs. + for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { + unsigned MOReg = DefRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegDef(MOReg, MI); + else if (!ReservedRegisters[MOReg]) + HandlePhysRegDef(MOReg, MI, Defs); + } + UpdatePhysRegDefs(MI, Defs); + } + + // Handle any virtual assignments from PHI nodes which might be at the + // bottom of this basic block. We check all of our successor blocks to see + // if they have PHI nodes, and if so, we simulate an assignment at the end + // of the current block. + if (!PHIVarInfo[MBB->getNumber()].empty()) { + SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()]; + + for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(), + E = VarInfoVec.end(); I != E; ++I) + // Mark it alive only in the block we are representing. + MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), + MBB); + } + + // Finally, if the last instruction in the block is a return, make sure to + // mark it as using all of the live-out values in the function. + // Things marked both call and return are tail calls; do not do this for + // them. The tail callee need not take the same registers as input + // that it produces as output, and there are dependencies for its input + // registers elsewhere. + if (!MBB->empty() && MBB->back().getDesc().isReturn() + && !MBB->back().getDesc().isCall()) { + MachineInstr *Ret = &MBB->back(); + + for (MachineRegisterInfo::liveout_iterator + I = MF->getRegInfo().liveout_begin(), + E = MF->getRegInfo().liveout_end(); I != E; ++I) { + assert(TargetRegisterInfo::isPhysicalRegister(*I) && + "Cannot have a live-out virtual register!"); + HandlePhysRegUse(*I, Ret); + + // Add live-out registers as implicit uses. + if (!Ret->readsRegister(*I)) + Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); + } + } + + // Loop over PhysRegDef / PhysRegUse, killing any registers that are + // available at the end of the basic block. + for (unsigned i = 0; i != NumRegs; ++i) + if (PhysRegDef[i] || PhysRegUse[i]) + HandlePhysRegDef(i, 0, Defs); + + std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); + std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + } + + // Convert and transfer the dead / killed information we have gathered into + // VirtRegInfo onto MI's. + for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) + for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j) + if (VirtRegInfo[i].Kills[j] == + MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister)) + VirtRegInfo[i] + .Kills[j]->addRegisterDead(i + + TargetRegisterInfo::FirstVirtualRegister, + TRI); + else + VirtRegInfo[i] + .Kills[j]->addRegisterKilled(i + + TargetRegisterInfo::FirstVirtualRegister, + TRI); + + // Check to make sure there are no unreachable blocks in the MC CFG for the + // function. If so, it is due to a bug in the instruction selector or some + // other part of the code generator if this happens. +#ifndef NDEBUG + for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) + assert(Visited.count(&*i) != 0 && "unreachable basic block found"); +#endif + + delete[] PhysRegDef; + delete[] PhysRegUse; + delete[] PHIVarInfo; + + return false; +} + +/// replaceKillInstruction - Update register kill info by replacing a kill +/// instruction with a new one. +void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI, + MachineInstr *NewMI) { + VarInfo &VI = getVarInfo(Reg); + std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI); +} + +/// removeVirtualRegistersKilled - Remove all killed info for the specified +/// instruction. +void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isKill()) { + MO.setIsKill(false); + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + bool removed = getVarInfo(Reg).removeKill(MI); + assert(removed && "kill not in register's VarInfo?"); + removed = true; + } + } + } +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the variable information of a virtual register +/// which is used in a PHI node. We map that to the BB the vreg is coming from. +/// +void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { + for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] + .push_back(BBI->getOperand(i).getReg()); +} + +bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, + unsigned Reg, + MachineRegisterInfo &MRI) { + unsigned Num = MBB.getNumber(); + + // Reg is live-through. + if (AliveBlocks.test(Num)) + return true; + + // Registers defined in MBB cannot be live in. + const MachineInstr *Def = MRI.getVRegDef(Reg); + if (Def && Def->getParent() == &MBB) + return false; + + // Reg was not defined in MBB, was it killed here? + return findKill(&MBB); +} + +bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) { + LiveVariables::VarInfo &VI = getVarInfo(Reg); + + // Loop over all of the successors of the basic block, checking to see if + // the value is either live in the block, or if it is killed in the block. + std::vector<MachineBasicBlock*> OpSuccBlocks; + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + E = MBB.succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + + // Is it alive in this successor? + unsigned SuccIdx = SuccMBB->getNumber(); + if (VI.AliveBlocks.test(SuccIdx)) + return true; + OpSuccBlocks.push_back(SuccMBB); + } + + // Check to see if this value is live because there is a use in a successor + // that kills it. + switch (OpSuccBlocks.size()) { + case 1: { + MachineBasicBlock *SuccMBB = OpSuccBlocks[0]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB) + return true; + break; + } + case 2: { + MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB1 || + VI.Kills[i]->getParent() == SuccMBB2) + return true; + break; + } + default: + std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), + VI.Kills[i]->getParent())) + return true; + } + return false; +} + +/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All +/// variables that are live out of DomBB will be marked as passing live through +/// BB. +void LiveVariables::addNewBlock(MachineBasicBlock *BB, + MachineBasicBlock *DomBB, + MachineBasicBlock *SuccBB) { + const unsigned NumNew = BB->getNumber(); + + // All registers used by PHI nodes in SuccBB must be live through BB. + for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(), + BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + if (BBI->getOperand(i+1).getMBB() == BB) + getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew); + + // Update info for all live variables + for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister, + E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) { + VarInfo &VI = getVarInfo(Reg); + if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI)) + VI.AliveBlocks.set(NumNew); + } +} diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp new file mode 100644 index 0000000..7e366f0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -0,0 +1,354 @@ +//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass assigns local frame indices to stack slots relative to one another +// and allocates additional base registers to access them when the target +// estimates the are likely to be out of range of stack pointer and frame +// pointer relative addressing. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "localstackalloc" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" + +using namespace llvm; + +STATISTIC(NumAllocations, "Number of frame indices allocated into local block"); +STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated"); +STATISTIC(NumReplacements, "Number of frame indices references replaced"); + +namespace { + class FrameRef { + MachineBasicBlock::iterator MI; // Instr referencing the frame + int64_t LocalOffset; // Local offset of the frame idx referenced + public: + FrameRef(MachineBasicBlock::iterator I, int64_t Offset) : + MI(I), LocalOffset(Offset) {} + bool operator<(const FrameRef &RHS) const { + return LocalOffset < RHS.LocalOffset; + } + MachineBasicBlock::iterator getMachineInstr() { return MI; } + }; + + class LocalStackSlotPass: public MachineFunctionPass { + SmallVector<int64_t,16> LocalOffsets; + + void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset, + bool StackGrowsDown, unsigned &MaxAlign); + void calculateFrameObjectOffsets(MachineFunction &Fn); + bool insertFrameReferenceRegisters(MachineFunction &Fn); + public: + static char ID; // Pass identification, replacement for typeid + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { } + bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + const char *getPassName() const { + return "Local Stack Slot Allocation"; + } + + private: + }; +} // end anonymous namespace + +char LocalStackSlotPass::ID = 0; + +FunctionPass *llvm::createLocalStackSlotAllocationPass() { + return new LocalStackSlotPass(); +} + +bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + unsigned LocalObjectCount = MFI->getObjectIndexEnd(); + + // If the target doesn't want/need this pass, or if there are no locals + // to consider, early exit. + if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0) + return true; + + // Make sure we have enough space to store the local offsets. + LocalOffsets.resize(MFI->getObjectIndexEnd()); + + // Lay out the local blob. + calculateFrameObjectOffsets(MF); + + // Insert virtual base registers to resolve frame index references. + bool UsedBaseRegs = insertFrameReferenceRegisters(MF); + + // Tell MFI whether any base registers were allocated. PEI will only + // want to use the local block allocations from this pass if there were any. + // Otherwise, PEI can do a bit better job of getting the alignment right + // without a hole at the start since it knows the alignment of the stack + // at the start of local allocation, and this pass doesn't. + MFI->setUseLocalStackAllocationBlock(UsedBaseRegs); + + return true; +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, + int FrameIdx, int64_t &Offset, + bool StackGrowsDown, + unsigned &MaxAlign) { + // If the stack grows down, add the object size to find the lowest address. + if (StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + int64_t LocalOffset = StackGrowsDown ? -Offset : Offset; + DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " + << LocalOffset << "\n"); + // Keep the offset available for base register allocation + LocalOffsets[FrameIdx] = LocalOffset; + // And tell MFI about it for PEI to use later + MFI->mapLocalFrameObject(FrameIdx, LocalOffset); + + if (!StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + ++NumAllocations; +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + int64_t Offset = 0; + unsigned MaxAlign = 0; + + // Make sure that the stack protector comes before the local variables on the + // stack. + SmallSet<int, 16> LargeStackObjs; + if (MFI->getStackProtectorIndex() >= 0) { + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset, + StackGrowsDown, MaxAlign); + + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (!MFI->MayNeedStackProtector(i)) + continue; + + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + LargeStackObjs.insert(i); + } + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (LargeStackObjs.count(i)) + continue; + + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + } + + // Remember how big this blob of stack space is + MFI->setLocalFrameSize(Offset); + MFI->setLocalFrameMaxAlign(MaxAlign); +} + +static inline bool +lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs, + std::pair<unsigned, int64_t> &RegOffset, + int64_t FrameSizeAdjust, + int64_t LocalFrameOffset, + const MachineInstr *MI, + const TargetRegisterInfo *TRI) { + unsigned e = Regs.size(); + for (unsigned i = 0; i < e; ++i) { + RegOffset = Regs[i]; + // Check if the relative offset from the where the base register references + // to the target address is in range for the instruction. + int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second; + if (TRI->isFrameOffsetLegal(MI, Offset)) + return true; + } + return false; +} + +bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { + // Scan the function's instructions looking for frame index references. + // For each, ask the target if it wants a virtual base register for it + // based on what we can tell it about where the local will end up in the + // stack frame. If it wants one, re-use a suitable one we've previously + // allocated, or if there isn't one that fits the bill, allocate a new one + // and ask the target to create a defining instruction for it. + bool UsedBaseReg = false; + + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + MachineBasicBlock::iterator InsertionPt = Fn.begin()->begin(); + + // Collect all of the instructions in the block that reference + // a frame index. Also store the frame index referenced to ease later + // lookup. (For any insn that has more than one FI reference, we arbitrarily + // choose the first one). + SmallVector<FrameRef, 64> FrameReferenceInsns; + // A base register definition is a register+offset pair. + SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters; + + + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + MachineInstr *MI = I; + // Debug value instructions can't be out of range, so they don't need + // any updates. + if (MI->isDebugValue()) + continue; + // For now, allocate the base register(s) within the basic block + // where they're used, and don't try to keep them around outside + // of that. It may be beneficial to try sharing them more broadly + // than that, but the increased register pressure makes that a + // tricky thing to balance. Investigate if re-materializing these + // becomes an issue. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + // Consider replacing all frame index operands that reference + // an object allocated in the local block. + if (MI->getOperand(i).isFI()) { + // Don't try this with values not in the local block. + if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex())) + break; + FrameReferenceInsns. + push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()])); + break; + } + } + } + } + // Sort the frame references by local offset + array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); + + + // Loop throught the frame references and allocate for them as necessary + for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) { + MachineBasicBlock::iterator I = + FrameReferenceInsns[ref].getMachineInstr(); + MachineInstr *MI = I; + for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) { + // Consider replacing all frame index operands that reference + // an object allocated in the local block. + if (MI->getOperand(idx).isFI()) { + int FrameIdx = MI->getOperand(idx).getIndex(); + + assert(MFI->isObjectPreAllocated(FrameIdx) && + "Only pre-allocated locals expected!"); + + DEBUG(dbgs() << "Considering: " << *MI); + if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) { + unsigned BaseReg = 0; + int64_t Offset = 0; + int64_t FrameSizeAdjust = + StackGrowsDown ? MFI->getLocalFrameSize() : 0; + + DEBUG(dbgs() << " Replacing FI in: " << *MI); + + // If we have a suitable base register available, use it; otherwise + // create a new one. Note that any offset encoded in the + // instruction itself will be taken into account by the target, + // so we don't have to adjust for it here when reusing a base + // register. + std::pair<unsigned, int64_t> RegOffset; + if (lookupCandidateBaseReg(BaseRegisters, RegOffset, + FrameSizeAdjust, + LocalOffsets[FrameIdx], + MI, TRI)) { + DEBUG(dbgs() << " Reusing base register " << + RegOffset.first << "\n"); + // We found a register to reuse. + BaseReg = RegOffset.first; + Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] - + RegOffset.second; + } else { + // No previously defined register was in range, so create a + // new one. + int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); + const TargetRegisterClass *RC = TRI->getPointerRegClass(); + BaseReg = Fn.getRegInfo().createVirtualRegister(RC); + + DEBUG(dbgs() << " Materializing base register " << BaseReg << + " at frame local offset " << + LocalOffsets[FrameIdx] + InstrOffset << "\n"); + // Tell the target to insert the instruction to initialize + // the base register. + TRI->materializeFrameBaseRegister(InsertionPt, BaseReg, + FrameIdx, InstrOffset); + + // The base register already includes any offset specified + // by the instruction, so account for that so it doesn't get + // applied twice. + Offset = -InstrOffset; + + int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] + + InstrOffset; + BaseRegisters.push_back( + std::pair<unsigned, int64_t>(BaseReg, BaseOffset)); + ++NumBaseRegisters; + UsedBaseReg = true; + } + assert(BaseReg != 0 && "Unable to allocate virtual base register!"); + + // Modify the instruction to use the new base register rather + // than the frame index operand. + TRI->resolveFrameIndex(I, BaseReg, Offset); + DEBUG(dbgs() << "Resolved: " << *MI); + + ++NumReplacements; + } + } + } + } + return UsedBaseReg; +} diff --git a/contrib/llvm/lib/CodeGen/LowerSubregs.cpp b/contrib/llvm/lib/CodeGen/LowerSubregs.cpp new file mode 100644 index 0000000..ad1c537 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LowerSubregs.cpp @@ -0,0 +1,223 @@ +//===-- LowerSubregs.cpp - Subregister Lowering instruction pass ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a MachineFunction pass which runs after register +// allocation that turns subreg insert/extract instructions into register +// copies, as needed. This ensures correct codegen even if the coalescer +// isn't able to remove all subreg instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowersubregs" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + struct LowerSubregsInstructionPass : public MachineFunctionPass { + private: + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + public: + static char ID; // Pass identification, replacement for typeid + LowerSubregsInstructionPass() : MachineFunctionPass(ID) {} + + const char *getPassName() const { + return "Subregister lowering instruction pass"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - pass entry point + bool runOnMachineFunction(MachineFunction&); + + private: + bool LowerSubregToReg(MachineInstr *MI); + bool LowerCopy(MachineInstr *MI); + + void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, + const TargetRegisterInfo *TRI); + void TransferImplicitDefs(MachineInstr *MI); + }; + + char LowerSubregsInstructionPass::ID = 0; +} + +FunctionPass *llvm::createLowerSubregsPass() { + return new LowerSubregsInstructionPass(); +} + +/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, +/// and the lowered replacement instructions immediately precede it. +/// Mark the replacement instructions with the dead flag. +void +LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, + unsigned DstReg, + const TargetRegisterInfo *TRI) { + for (MachineBasicBlock::iterator MII = + prior(MachineBasicBlock::iterator(MI)); ; --MII) { + if (MII->addRegisterDead(DstReg, TRI)) + break; + assert(MII != MI->getParent()->begin() && + "copyPhysReg output doesn't reference destination register!"); + } +} + +/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered +/// replacement instructions immediately precede it. Copy any implicit-def +/// operands from MI to the replacement instruction. +void +LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) { + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isImplicit() || MO.isUse()) + continue; + CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true)); + } +} + +bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) && + MI->getOperand(1).isImm() && + (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && + MI->getOperand(3).isImm() && "Invalid subreg_to_reg"); + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned InsReg = MI->getOperand(2).getReg(); + assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?"); + unsigned SubIdx = MI->getOperand(3).getImm(); + + assert(SubIdx != 0 && "Invalid index for insert_subreg"); + unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx); + + assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + "Insert destination must be in a physical register"); + assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && + "Inserted value must be in a physical register"); + + DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); + + if (DstSubReg == InsReg) { + // No need to insert an identify copy instruction. + // Watch out for case like this: + // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3 + // We must leave %RAX live. + if (DstReg != InsReg) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + MI->RemoveOperand(3); // SubIdx + MI->RemoveOperand(1); // Imm + DEBUG(dbgs() << "subreg: replace by: " << *MI); + return true; + } + DEBUG(dbgs() << "subreg: eliminated!"); + } else { + TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, + MI->getOperand(2).isKill()); + // Transfer the kill/dead flags, if needed. + if (MI->getOperand(0).isDead()) + TransferDeadFlag(MI, DstSubReg, TRI); + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + dbgs() << "subreg: " << *(--dMI); + }); + } + + DEBUG(dbgs() << '\n'); + MBB->erase(MI); + return true; +} + +bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) { + MachineOperand &DstMO = MI->getOperand(0); + MachineOperand &SrcMO = MI->getOperand(1); + + if (SrcMO.getReg() == DstMO.getReg()) { + DEBUG(dbgs() << "identity copy: " << *MI); + // No need to insert an identity copy instruction, but replace with a KILL + // if liveness is changed. + if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) { + // We must make sure the super-register gets killed. Replace the + // instruction with KILL. + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); + return true; + } + // Vanilla identity copy. + MI->eraseFromParent(); + return true; + } + + DEBUG(dbgs() << "real copy: " << *MI); + TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), + DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); + + if (DstMO.isDead()) + TransferDeadFlag(MI, DstMO.getReg(), TRI); + if (MI->getNumOperands() > 2) + TransferImplicitDefs(MI); + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + dbgs() << "replaced by: " << *(--dMI); + }); + MI->eraseFromParent(); + return true; +} + +/// runOnMachineFunction - Reduce subregister inserts and extracts to register +/// copies. +/// +bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "Machine Function\n" + << "********** LOWERING SUBREG INSTRS **********\n" + << "********** Function: " + << MF.getFunction()->getName() << '\n'); + TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + + bool MadeChange = false; + + for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + mbbi != mbbe; ++mbbi) { + for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + mi != me;) { + MachineBasicBlock::iterator nmi = llvm::next(mi); + MachineInstr *MI = mi; + assert(!MI->isInsertSubreg() && "INSERT_SUBREG should no longer appear"); + assert(MI->getOpcode() != TargetOpcode::EXTRACT_SUBREG && + "EXTRACT_SUBREG should no longer appear"); + if (MI->isSubregToReg()) { + MadeChange |= LowerSubregToReg(MI); + } else if (MI->isCopy()) { + MadeChange |= LowerCopy(MI); + } + mi = nmi; + } + } + + return MadeChange; +} diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp new file mode 100644 index 0000000..50f3f67 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -0,0 +1,651 @@ +//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect the sequence of machine instructions for a basic block. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrDesc.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/LeakDetector.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +using namespace llvm; + +MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) + : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false), + AddressTaken(false) { + Insts.Parent = this; +} + +MachineBasicBlock::~MachineBasicBlock() { + LeakDetector::removeGarbageObject(this); +} + +/// getSymbol - Return the MCSymbol for this basic block. +/// +MCSymbol *MachineBasicBlock::getSymbol() const { + const MachineFunction *MF = getParent(); + MCContext &Ctx = MF->getContext(); + const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix(); + return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + + Twine(MF->getFunctionNumber()) + "_" + + Twine(getNumber())); +} + + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { + MBB.print(OS); + return OS; +} + +/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the +/// parent pointer of the MBB, the MBB numbering, and any instructions in the +/// MBB to be on the right operand list for registers. +/// +/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it +/// gets the next available unique MBB number. If it is removed from a +/// MachineFunction, it goes back to being #-1. +void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) { + MachineFunction &MF = *N->getParent(); + N->Number = MF.addToMBBNumbering(N); + + // Make sure the instructions have their operands in the reginfo lists. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I) + I->AddRegOperandsToUseLists(RegInfo); + + LeakDetector::removeGarbageObject(N); +} + +void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { + N->getParent()->removeFromMBBNumbering(N->Number); + N->Number = -1; + LeakDetector::addGarbageObject(N); +} + + +/// addNodeToList (MI) - When we add an instruction to a basic block +/// list, we update its parent pointer and add its operands from reg use/def +/// lists if appropriate. +void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { + assert(N->getParent() == 0 && "machine instruction already in a basic block"); + N->setParent(Parent); + + // Add the instruction's register operands to their corresponding + // use/def lists. + MachineFunction *MF = Parent->getParent(); + N->AddRegOperandsToUseLists(MF->getRegInfo()); + + LeakDetector::removeGarbageObject(N); +} + +/// removeNodeFromList (MI) - When we remove an instruction from a basic block +/// list, we update its parent pointer and remove its operands from reg use/def +/// lists if appropriate. +void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { + assert(N->getParent() != 0 && "machine instruction not in a basic block"); + + // Remove from the use/def lists. + N->RemoveRegOperandsFromUseLists(); + + N->setParent(0); + + LeakDetector::addGarbageObject(N); +} + +/// transferNodesFromList (MI) - When moving a range of instructions from one +/// MBB list to another, we need to update the parent pointers and the use/def +/// lists. +void ilist_traits<MachineInstr>:: +transferNodesFromList(ilist_traits<MachineInstr> &fromList, + MachineBasicBlock::iterator first, + MachineBasicBlock::iterator last) { + assert(Parent->getParent() == fromList.Parent->getParent() && + "MachineInstr parent mismatch!"); + + // Splice within the same MBB -> no change. + if (Parent == fromList.Parent) return; + + // If splicing between two blocks within the same function, just update the + // parent pointers. + for (; first != last; ++first) + first->setParent(Parent); +} + +void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) { + assert(!MI->getParent() && "MI is still in a block!"); + Parent->getParent()->DeleteMachineInstr(MI); +} + +MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { + iterator I = begin(); + while (I != end() && I->isPHI()) + ++I; + return I; +} + +MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { + iterator I = end(); + while (I != begin() && (--I)->getDesc().isTerminator()) + ; /*noop */ + if (I != end() && !I->getDesc().isTerminator()) ++I; + return I; +} + +void MachineBasicBlock::dump() const { + print(dbgs()); +} + +static inline void OutputReg(raw_ostream &os, unsigned RegNo, + const TargetRegisterInfo *TRI = 0) { + if (RegNo != 0 && TargetRegisterInfo::isPhysicalRegister(RegNo)) { + if (TRI) + os << " %" << TRI->get(RegNo).Name; + else + os << " %physreg" << RegNo; + } else + os << " %reg" << RegNo; +} + +StringRef MachineBasicBlock::getName() const { + if (const BasicBlock *LBB = getBasicBlock()) + return LBB->getName(); + else + return "(null)"; +} + +void MachineBasicBlock::print(raw_ostream &OS) const { + const MachineFunction *MF = getParent(); + if (!MF) { + OS << "Can't print out MachineBasicBlock because parent MachineFunction" + << " is null\n"; + return; + } + + if (Alignment) { OS << "Alignment " << Alignment << "\n"; } + + OS << "BB#" << getNumber() << ": "; + + const char *Comma = ""; + if (const BasicBlock *LBB = getBasicBlock()) { + OS << Comma << "derived from LLVM BB "; + WriteAsOperand(OS, LBB, /*PrintType=*/false); + Comma = ", "; + } + if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } + if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } + OS << '\n'; + + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + if (!livein_empty()) { + OS << " Live Ins:"; + for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) + OutputReg(OS, *I, TRI); + OS << '\n'; + } + // Print the preds of this block according to the CFG. + if (!pred_empty()) { + OS << " Predecessors according to CFG:"; + for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) + OS << " BB#" << (*PI)->getNumber(); + OS << '\n'; + } + + for (const_iterator I = begin(); I != end(); ++I) { + OS << '\t'; + I->print(OS, &getParent()->getTarget()); + } + + // Print the successors of this block according to the CFG. + if (!succ_empty()) { + OS << " Successors according to CFG:"; + for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) + OS << " BB#" << (*SI)->getNumber(); + OS << '\n'; + } +} + +void MachineBasicBlock::removeLiveIn(unsigned Reg) { + std::vector<unsigned>::iterator I = + std::find(LiveIns.begin(), LiveIns.end(), Reg); + assert(I != LiveIns.end() && "Not a live in!"); + LiveIns.erase(I); +} + +bool MachineBasicBlock::isLiveIn(unsigned Reg) const { + livein_iterator I = std::find(livein_begin(), livein_end(), Reg); + return I != livein_end(); +} + +void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) { + getParent()->splice(NewAfter, this); +} + +void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { + MachineFunction::iterator BBI = NewBefore; + getParent()->splice(++BBI, this); +} + +void MachineBasicBlock::updateTerminator() { + const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + // A block with no successors has no concerns with fall-through edges. + if (this->succ_empty()) return; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + DebugLoc dl; // FIXME: this is nowhere + bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); + (void) B; + assert(!B && "UpdateTerminators requires analyzable predecessors!"); + if (Cond.empty()) { + if (TBB) { + // The block has an unconditional branch. If its successor is now + // its layout successor, delete the branch. + if (isLayoutSuccessor(TBB)) + TII->RemoveBranch(*this); + } else { + // The block has an unconditional fallthrough. If its successor is not + // its layout successor, insert a branch. + TBB = *succ_begin(); + if (!isLayoutSuccessor(TBB)) + TII->InsertBranch(*this, TBB, 0, Cond, dl); + } + } else { + if (FBB) { + // The block has a non-fallthrough conditional branch. If one of its + // successors is its layout successor, rewrite it to a fallthrough + // conditional branch. + if (isLayoutSuccessor(TBB)) { + if (TII->ReverseBranchCondition(Cond)) + return; + TII->RemoveBranch(*this); + TII->InsertBranch(*this, FBB, 0, Cond, dl); + } else if (isLayoutSuccessor(FBB)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, 0, Cond, dl); + } + } else { + // The block has a fallthrough conditional branch. + MachineBasicBlock *MBBA = *succ_begin(); + MachineBasicBlock *MBBB = *llvm::next(succ_begin()); + if (MBBA == TBB) std::swap(MBBB, MBBA); + if (isLayoutSuccessor(TBB)) { + if (TII->ReverseBranchCondition(Cond)) { + // We can't reverse the condition, add an unconditional branch. + Cond.clear(); + TII->InsertBranch(*this, MBBA, 0, Cond, dl); + return; + } + TII->RemoveBranch(*this); + TII->InsertBranch(*this, MBBA, 0, Cond, dl); + } else if (!isLayoutSuccessor(MBBA)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, MBBA, Cond, dl); + } + } + } +} + +void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) { + Successors.push_back(succ); + succ->addPredecessor(this); +} + +void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) { + succ->removePredecessor(this); + succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); + assert(I != Successors.end() && "Not a current successor!"); + Successors.erase(I); +} + +MachineBasicBlock::succ_iterator +MachineBasicBlock::removeSuccessor(succ_iterator I) { + assert(I != Successors.end() && "Not a current successor!"); + (*I)->removePredecessor(this); + return Successors.erase(I); +} + +void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { + Predecessors.push_back(pred); +} + +void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { + std::vector<MachineBasicBlock *>::iterator I = + std::find(Predecessors.begin(), Predecessors.end(), pred); + assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); + Predecessors.erase(I); +} + +void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { + if (this == fromMBB) + return; + + while (!fromMBB->succ_empty()) { + MachineBasicBlock *Succ = *fromMBB->succ_begin(); + addSuccessor(Succ); + fromMBB->removeSuccessor(Succ); + } +} + +void +MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { + if (this == fromMBB) + return; + + while (!fromMBB->succ_empty()) { + MachineBasicBlock *Succ = *fromMBB->succ_begin(); + addSuccessor(Succ); + fromMBB->removeSuccessor(Succ); + + // Fix up any PHI nodes in the successor. + for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end(); + MI != ME && MI->isPHI(); ++MI) + for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { + MachineOperand &MO = MI->getOperand(i); + if (MO.getMBB() == fromMBB) + MO.setMBB(this); + } + } +} + +bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { + std::vector<MachineBasicBlock *>::const_iterator I = + std::find(Successors.begin(), Successors.end(), MBB); + return I != Successors.end(); +} + +bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { + MachineFunction::const_iterator I(this); + return llvm::next(I) == MachineFunction::const_iterator(MBB); +} + +bool MachineBasicBlock::canFallThrough() { + MachineFunction::iterator Fallthrough = this; + ++Fallthrough; + // If FallthroughBlock is off the end of the function, it can't fall through. + if (Fallthrough == getParent()->end()) + return false; + + // If FallthroughBlock isn't a successor, no fallthrough is possible. + if (!isSuccessor(Fallthrough)) + return false; + + // Analyze the branches, if any, at the end of the block. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { + // If we couldn't analyze the branch, examine the last instruction. + // If the block doesn't end in a known control barrier, assume fallthrough + // is possible. The isPredicable check is needed because this code can be + // called during IfConversion, where an instruction which is normally a + // Barrier is predicated and thus no longer an actual control barrier. This + // is over-conservative though, because if an instruction isn't actually + // predicated we could still treat it like a barrier. + return empty() || !back().getDesc().isBarrier() || + back().getDesc().isPredicable(); + } + + // If there is no branch, control always falls through. + if (TBB == 0) return true; + + // If there is some explicit branch to the fallthrough block, it can obviously + // reach, even though the branch should get folded to fall through implicitly. + if (MachineFunction::iterator(TBB) == Fallthrough || + MachineFunction::iterator(FBB) == Fallthrough) + return true; + + // If it's an unconditional branch to some block not the fall through, it + // doesn't fall through. + if (Cond.empty()) return false; + + // Otherwise, if it is conditional and has no explicit false block, it falls + // through. + return FBB == 0; +} + +MachineBasicBlock * +MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { + MachineFunction *MF = getParent(); + DebugLoc dl; // FIXME: this is nowhere + + // We may need to update this's terminator, but we can't do that if AnalyzeBranch + // fails. If this uses a jump table, we won't touch it. + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) + return NULL; + + MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); + MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB); + DEBUG(dbgs() << "Splitting critical edge:" + " BB#" << getNumber() + << " -- BB#" << NMBB->getNumber() + << " -- BB#" << Succ->getNumber() << '\n'); + + ReplaceUsesOfBlockWith(Succ, NMBB); + updateTerminator(); + + // Insert unconditional "jump Succ" instruction in NMBB if necessary. + NMBB->addSuccessor(Succ); + if (!NMBB->isLayoutSuccessor(Succ)) { + Cond.clear(); + MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl); + } + + // Fix PHI nodes in Succ so they refer to NMBB instead of this + for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end(); + i != e && i->isPHI(); ++i) + for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) + if (i->getOperand(ni+1).getMBB() == this) + i->getOperand(ni+1).setMBB(NMBB); + + if (LiveVariables *LV = + P->getAnalysisIfAvailable<LiveVariables>()) + LV->addNewBlock(NMBB, this, Succ); + + if (MachineDominatorTree *MDT = + P->getAnalysisIfAvailable<MachineDominatorTree>()) { + // Update dominator information. + MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ); + + bool IsNewIDom = true; + for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end(); + PI != E; ++PI) { + MachineBasicBlock *PredBB = *PI; + if (PredBB == NMBB) + continue; + if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) { + IsNewIDom = false; + break; + } + } + + // We know "this" dominates the newly created basic block. + MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this); + + // If all the other predecessors of "Succ" are dominated by "Succ" itself + // then the new block is the new immediate dominator of "Succ". Otherwise, + // the new block doesn't dominate anything. + if (IsNewIDom) + MDT->changeImmediateDominator(SucccDTNode, NewDTNode); + } + + if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) + if (MachineLoop *TIL = MLI->getLoopFor(this)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { + if (TIL == DestLoop) { + // Both in the same loop, the NMBB joins loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (TIL->contains(DestLoop)) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (DestLoop->contains(TIL)) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(DestLoop->getHeader() == Succ && + "Should not create irreducible loops!"); + if (MachineLoop *P = DestLoop->getParentLoop()) + P->addBasicBlockToLoop(NMBB, MLI->getBase()); + } + } + } + + return NMBB; +} + +/// removeFromParent - This method unlinks 'this' from the containing function, +/// and returns it, but does not delete it. +MachineBasicBlock *MachineBasicBlock::removeFromParent() { + assert(getParent() && "Not embedded in a function!"); + getParent()->remove(this); + return this; +} + + +/// eraseFromParent - This method unlinks 'this' from the containing function, +/// and deletes it. +void MachineBasicBlock::eraseFromParent() { + assert(getParent() && "Not embedded in a function!"); + getParent()->erase(this); +} + + +/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to +/// 'Old', change the code and CFG so that it branches to 'New' instead. +void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Cannot replace self with self!"); + + MachineBasicBlock::iterator I = end(); + while (I != begin()) { + --I; + if (!I->getDesc().isTerminator()) break; + + // Scan the operands of this machine instruction, replacing any uses of Old + // with New. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i).isMBB() && + I->getOperand(i).getMBB() == Old) + I->getOperand(i).setMBB(New); + } + + // Update the successor information. + removeSuccessor(Old); + addSuccessor(New); +} + +/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the +/// CFG to be inserted. If we have proven that MBB can only branch to DestA and +/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be +/// null. +/// +/// Besides DestA and DestB, retain other edges leading to LandingPads +/// (currently there can be only one; we don't check or require that here). +/// Note it is possible that DestA and/or DestB are LandingPads. +bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, + MachineBasicBlock *DestB, + bool isCond) { + // The values of DestA and DestB frequently come from a call to the + // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial + // values from there. + // + // 1. If both DestA and DestB are null, then the block ends with no branches + // (it falls through to its successor). + // 2. If DestA is set, DestB is null, and isCond is false, then the block ends + // with only an unconditional branch. + // 3. If DestA is set, DestB is null, and isCond is true, then the block ends + // with a conditional branch that falls through to a successor (DestB). + // 4. If DestA and DestB is set and isCond is true, then the block ends with a + // conditional branch followed by an unconditional branch. DestA is the + // 'true' destination and DestB is the 'false' destination. + + bool Changed = false; + + MachineFunction::iterator FallThru = + llvm::next(MachineFunction::iterator(this)); + + if (DestA == 0 && DestB == 0) { + // Block falls through to successor. + DestA = FallThru; + DestB = FallThru; + } else if (DestA != 0 && DestB == 0) { + if (isCond) + // Block ends in conditional jump that falls through to successor. + DestB = FallThru; + } else { + assert(DestA && DestB && isCond && + "CFG in a bad state. Cannot correct CFG edges"); + } + + // Remove superfluous edges. I.e., those which aren't destinations of this + // basic block, duplicate edges, or landing pads. + SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs; + MachineBasicBlock::succ_iterator SI = succ_begin(); + while (SI != succ_end()) { + const MachineBasicBlock *MBB = *SI; + if (!SeenMBBs.insert(MBB) || + (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) { + // This is a superfluous edge, remove it. + SI = removeSuccessor(SI); + Changed = true; + } else { + ++SI; + } + } + + return Changed; +} + +/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping +/// any DBG_VALUE instructions. Return UnknownLoc if there is none. +DebugLoc +MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) { + DebugLoc DL; + MachineBasicBlock::iterator E = end(); + if (MBBI != E) { + // Skip debug declarations, we don't want a DebugLoc from them. + MachineBasicBlock::iterator MBBI2 = MBBI; + while (MBBI2 != E && MBBI2->isDebugValue()) + MBBI2++; + if (MBBI2 != E) + DL = MBBI2->getDebugLoc(); + } + return DL; +} + +void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, + bool t) { + OS << "BB#" << MBB->getNumber(); +} + diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp new file mode 100644 index 0000000..92e2299 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -0,0 +1,508 @@ +//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs global common subexpression elimination on machine +// instructions using a scoped hash table based value numbering scheme. It +// must be run while the machine function is still in SSA form. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-cse" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +STATISTIC(NumCoalesces, "Number of copies coalesced"); +STATISTIC(NumCSEs, "Number of common subexpression eliminated"); +STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated"); + +namespace { + class MachineCSE : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + AliasAnalysis *AA; + MachineDominatorTree *DT; + MachineRegisterInfo *MRI; + public: + static char ID; // Pass identification + MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + } + + private: + const unsigned LookAheadLimit; + typedef ScopedHashTableScope<MachineInstr*, unsigned, + MachineInstrExpressionTrait> ScopeType; + DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; + ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; + SmallVector<MachineInstr*, 64> Exps; + unsigned CurrVN; + + bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); + bool isPhysDefTriviallyDead(unsigned Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E) const ; + bool hasLivePhysRegDefUse(const MachineInstr *MI, + const MachineBasicBlock *MBB, + unsigned &PhysDef) const; + bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, + unsigned PhysDef) const; + bool isCSECandidate(MachineInstr *MI); + bool isProfitableToCSE(unsigned CSReg, unsigned Reg, + MachineInstr *CSMI, MachineInstr *MI); + void EnterScope(MachineBasicBlock *MBB); + void ExitScope(MachineBasicBlock *MBB); + bool ProcessBlock(MachineBasicBlock *MBB); + void ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); + bool PerformCSE(MachineDomTreeNode *Node); + }; +} // end anonymous namespace + +char MachineCSE::ID = 0; +INITIALIZE_PASS(MachineCSE, "machine-cse", + "Machine Common Subexpression Elimination", false, false); + +FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } + +bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, + MachineBasicBlock *MBB) { + bool Changed = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (!MRI->hasOneUse(Reg)) + // Only coalesce single use copies. This ensure the copy will be + // deleted. + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (DefMI->getParent() != MBB) + continue; + if (!DefMI->isCopy()) + continue; + unsigned SrcReg = DefMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + continue; + if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) + continue; + const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); + if (!NewRC) + continue; + DEBUG(dbgs() << "Coalescing: " << *DefMI); + DEBUG(dbgs() << "*** to: " << *MI); + MO.setReg(SrcReg); + MRI->clearKillFlags(SrcReg); + if (NewRC != SRC) + MRI->setRegClass(SrcReg, NewRC); + DefMI->eraseFromParent(); + ++NumCoalesces; + Changed = true; + } + + return Changed; +} + +bool +MachineCSE::isPhysDefTriviallyDead(unsigned Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E) const { + unsigned LookAheadLeft = LookAheadLimit; + while (LookAheadLeft) { + // Skip over dbg_value's. + while (I != E && I->isDebugValue()) + ++I; + + if (I == E) + // Reached end of block, register is obviously dead. + return true; + + bool SeenDef = false; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = I->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (!TRI->regsOverlap(MO.getReg(), Reg)) + continue; + if (MO.isUse()) + // Found a use! + return false; + SeenDef = true; + } + if (SeenDef) + // See a def of Reg (or an alias) before encountering any use, it's + // trivially dead. + return true; + + --LookAheadLeft; + ++I; + } + return false; +} + +/// hasLivePhysRegDefUse - Return true if the specified instruction read / write +/// physical registers (except for dead defs of physical registers). It also +/// returns the physical register def by reference if it's the only one and the +/// instruction does not uses a physical register. +bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, + const MachineBasicBlock *MBB, + unsigned &PhysDef) const { + PhysDef = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (MO.isUse()) { + // Can't touch anything to read a physical register. + PhysDef = 0; + return true; + } + if (MO.isDead()) + // If the def is dead, it's ok. + continue; + // Ok, this is a physical register def that's not marked "dead". That's + // common since this pass is run before livevariables. We can scan + // forward a few instructions and check if it is obviously dead. + if (PhysDef) { + // Multiple physical register defs. These are rare, forget about it. + PhysDef = 0; + return true; + } + PhysDef = Reg; + } + + if (PhysDef) { + MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); + if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end())) + return true; + } + return false; +} + +bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, + unsigned PhysDef) const { + // For now conservatively returns false if the common subexpression is + // not in the same basic block as the given instruction. + MachineBasicBlock *MBB = MI->getParent(); + if (CSMI->getParent() != MBB) + return false; + MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); + MachineBasicBlock::const_iterator E = MI; + unsigned LookAheadLeft = LookAheadLimit; + while (LookAheadLeft) { + // Skip over dbg_value's. + while (I != E && I->isDebugValue()) + ++I; + + if (I == E) + return true; + if (I->modifiesRegister(PhysDef, TRI)) + return false; + + --LookAheadLeft; + ++I; + } + + return false; +} + +bool MachineCSE::isCSECandidate(MachineInstr *MI) { + if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || + MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) + return false; + + // Ignore copies. + if (MI->isCopyLike()) + return false; + + // Ignore stuff that we obviously can't move. + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.mayStore() || TID.isCall() || TID.isTerminator() || + TID.hasUnmodeledSideEffects()) + return false; + + if (TID.mayLoad()) { + // Okay, this instruction does a load. As a refinement, we allow the target + // to decide whether the loaded value is actually a constant. If so, we can + // actually use it as a load. + if (!MI->isInvariantLoad(AA)) + // FIXME: we should be able to hoist loads with no other side effects if + // there are no other instructions which can change memory in this loop. + // This is a trivial form of alias analysis. + return false; + } + return true; +} + +/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a +/// common expression that defines Reg. +bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, + MachineInstr *CSMI, MachineInstr *MI) { + // FIXME: Heuristics that works around the lack the live range splitting. + + // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an + // immediate predecessor. We don't want to increase register pressure and end up + // causing other computation to be spilled. + if (MI->getDesc().isAsCheapAsAMove()) { + MachineBasicBlock *CSBB = CSMI->getParent(); + MachineBasicBlock *BB = MI->getParent(); + if (CSBB != BB && + find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end()) + return false; + } + + // Heuristics #2: If the expression doesn't not use a vr and the only use + // of the redundant computation are copies, do not cse. + bool HasVRegUse = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + HasVRegUse = true; + break; + } + } + if (!HasVRegUse) { + bool HasNonCopyUse = false; + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + // Ignore copies. + if (!Use->isCopyLike()) { + HasNonCopyUse = true; + break; + } + } + if (!HasNonCopyUse) + return false; + } + + // Heuristics #3: If the common subexpression is used by PHIs, do not reuse + // it unless the defined value is already used in the BB of the new use. + bool HasPHI = false; + SmallPtrSet<MachineBasicBlock*, 4> CSBBs; + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + HasPHI |= Use->isPHI(); + CSBBs.insert(Use->getParent()); + } + + if (!HasPHI) + return true; + return CSBBs.count(MI->getParent()); +} + +void MachineCSE::EnterScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); + ScopeType *Scope = new ScopeType(VNT); + ScopeMap[MBB] = Scope; +} + +void MachineCSE::ExitScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); + DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); + assert(SI != ScopeMap.end()); + ScopeMap.erase(SI); + delete SI->second; +} + +bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { + bool Changed = false; + + SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { + MachineInstr *MI = &*I; + ++I; + + if (!isCSECandidate(MI)) + continue; + + bool DefPhys = false; + bool FoundCSE = VNT.count(MI); + if (!FoundCSE) { + // Look for trivial copy coalescing opportunities. + if (PerformTrivialCoalescing(MI, MBB)) { + // After coalescing MI itself may become a copy. + if (MI->isCopyLike()) + continue; + FoundCSE = VNT.count(MI); + } + } + // FIXME: commute commutable instructions? + + // If the instruction defines a physical register and the value *may* be + // used, then it's not safe to replace it with a common subexpression. + unsigned PhysDef = 0; + if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) { + FoundCSE = false; + + // ... Unless the CS is local and it also defines the physical register + // which is not clobbered in between. + if (PhysDef) { + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + if (PhysRegDefReaches(CSMI, MI, PhysDef)) { + FoundCSE = true; + DefPhys = true; + } + } + } + + if (!FoundCSE) { + VNT.insert(MI, CurrVN++); + Exps.push_back(MI); + continue; + } + + // Found a common subexpression, eliminate it. + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + DEBUG(dbgs() << "Examining: " << *MI); + DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); + + // Check if it's profitable to perform this CSE. + bool DoCSE = true; + unsigned NumDefs = MI->getDesc().getNumDefs(); + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned OldReg = MO.getReg(); + unsigned NewReg = CSMI->getOperand(i).getReg(); + if (OldReg == NewReg) + continue; + assert(TargetRegisterInfo::isVirtualRegister(OldReg) && + TargetRegisterInfo::isVirtualRegister(NewReg) && + "Do not CSE physical register defs!"); + if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { + DoCSE = false; + break; + } + CSEPairs.push_back(std::make_pair(OldReg, NewReg)); + --NumDefs; + } + + // Actually perform the elimination. + if (DoCSE) { + for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { + MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); + MRI->clearKillFlags(CSEPairs[i].second); + } + MI->eraseFromParent(); + ++NumCSEs; + if (DefPhys) + ++NumPhysCSEs; + } else { + DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); + VNT.insert(MI, CurrVN++); + Exps.push_back(MI); + } + CSEPairs.clear(); + } + + return Changed; +} + +/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given +/// dominator tree node if its a leaf or all of its children are done. Walk +/// up the dominator tree to destroy ancestors which are now done. +void +MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { + if (OpenChildren[Node]) + return; + + // Pop scope. + ExitScope(Node->getBlock()); + + // Now traverse upwards to pop ancestors whose offsprings are all done. + while (MachineDomTreeNode *Parent = ParentMap[Node]) { + unsigned Left = --OpenChildren[Parent]; + if (Left != 0) + break; + ExitScope(Parent->getBlock()); + Node = Parent; + } +} + +bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { + SmallVector<MachineDomTreeNode*, 32> Scopes; + SmallVector<MachineDomTreeNode*, 8> WorkList; + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; + DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; + + // Perform a DFS walk to determine the order of visit. + WorkList.push_back(Node); + do { + Node = WorkList.pop_back_val(); + Scopes.push_back(Node); + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + unsigned NumChildren = Children.size(); + OpenChildren[Node] = NumChildren; + for (unsigned i = 0; i != NumChildren; ++i) { + MachineDomTreeNode *Child = Children[i]; + ParentMap[Child] = Node; + WorkList.push_back(Child); + } + } while (!WorkList.empty()); + + // Now perform CSE. + bool Changed = false; + for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { + MachineDomTreeNode *Node = Scopes[i]; + MachineBasicBlock *MBB = Node->getBlock(); + EnterScope(MBB); + Changed |= ProcessBlock(MBB); + // If it's a leaf node, it's done. Traverse upwards to pop ancestors. + ExitScopeIfDone(Node, OpenChildren, ParentMap); + } + + return Changed; +} + +bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<MachineDominatorTree>(); + return PerformCSE(DT->getRootNode()); +} diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp new file mode 100644 index 0000000..3c67478 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp @@ -0,0 +1,58 @@ +//===- MachineDominators.cpp - Machine Dominator Calculation --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements simple dominator construction algorithms for finding +// forward dominators on machine functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +namespace llvm { +TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>); +TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>); +} + +char MachineDominatorTree::ID = 0; + +INITIALIZE_PASS(MachineDominatorTree, "machinedomtree", + "MachineDominator Tree Construction", true, true); + +char &llvm::MachineDominatorsID = MachineDominatorTree::ID; + +void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { + DT->recalculate(F); + + return false; +} + +MachineDominatorTree::MachineDominatorTree() + : MachineFunctionPass(ID) { + DT = new DominatorTreeBase<MachineBasicBlock>(false); +} + +MachineDominatorTree::~MachineDominatorTree() { + delete DT; +} + +void MachineDominatorTree::releaseMemory() { + DT->releaseMemory(); +} + +void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { + DT->print(OS); +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp new file mode 100644 index 0000000..0171700 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -0,0 +1,739 @@ +//===-- MachineFunction.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect native machine code information for a function. This allows +// target-specific information about the generated code to be stored with each +// function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Config/config.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineFunction implementation +//===----------------------------------------------------------------------===// + +// Out of line virtual method. +MachineFunctionInfo::~MachineFunctionInfo() {} + +void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { + MBB->getParent()->DeleteMachineBasicBlock(MBB); +} + +MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, + unsigned FunctionNum, MachineModuleInfo &mmi) + : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) { + if (TM.getRegisterInfo()) + RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo()); + else + RegInfo = 0; + MFInfo = 0; + FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameInfo()); + if (Fn->hasFnAttr(Attribute::StackAlignment)) + FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs( + Fn->getAttributes().getFnAttributes())); + ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData()); + Alignment = TM.getTargetLowering()->getFunctionAlignment(F); + FunctionNumber = FunctionNum; + JumpTableInfo = 0; +} + +MachineFunction::~MachineFunction() { + BasicBlocks.clear(); + InstructionRecycler.clear(Allocator); + BasicBlockRecycler.clear(Allocator); + if (RegInfo) { + RegInfo->~MachineRegisterInfo(); + Allocator.Deallocate(RegInfo); + } + if (MFInfo) { + MFInfo->~MachineFunctionInfo(); + Allocator.Deallocate(MFInfo); + } + FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo); + ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool); + + if (JumpTableInfo) { + JumpTableInfo->~MachineJumpTableInfo(); + Allocator.Deallocate(JumpTableInfo); + } +} + +/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it +/// does already exist, allocate one. +MachineJumpTableInfo *MachineFunction:: +getOrCreateJumpTableInfo(unsigned EntryKind) { + if (JumpTableInfo) return JumpTableInfo; + + JumpTableInfo = new (Allocator) + MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind); + return JumpTableInfo; +} + +/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and +/// recomputes them. This guarantees that the MBB numbers are sequential, +/// dense, and match the ordering of the blocks within the function. If a +/// specific MachineBasicBlock is specified, only that block and those after +/// it are renumbered. +void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { + if (empty()) { MBBNumbering.clear(); return; } + MachineFunction::iterator MBBI, E = end(); + if (MBB == 0) + MBBI = begin(); + else + MBBI = MBB; + + // Figure out the block number this should have. + unsigned BlockNo = 0; + if (MBBI != begin()) + BlockNo = prior(MBBI)->getNumber()+1; + + for (; MBBI != E; ++MBBI, ++BlockNo) { + if (MBBI->getNumber() != (int)BlockNo) { + // Remove use of the old number. + if (MBBI->getNumber() != -1) { + assert(MBBNumbering[MBBI->getNumber()] == &*MBBI && + "MBB number mismatch!"); + MBBNumbering[MBBI->getNumber()] = 0; + } + + // If BlockNo is already taken, set that block's number to -1. + if (MBBNumbering[BlockNo]) + MBBNumbering[BlockNo]->setNumber(-1); + + MBBNumbering[BlockNo] = MBBI; + MBBI->setNumber(BlockNo); + } + } + + // Okay, all the blocks are renumbered. If we have compactified the block + // numbering, shrink MBBNumbering now. + assert(BlockNo <= MBBNumbering.size() && "Mismatch!"); + MBBNumbering.resize(BlockNo); +} + +/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead +/// of `new MachineInstr'. +/// +MachineInstr * +MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID, + DebugLoc DL, bool NoImp) { + return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) + MachineInstr(TID, DL, NoImp); +} + +/// CloneMachineInstr - Create a new MachineInstr which is a copy of the +/// 'Orig' instruction, identical in all ways except the instruction +/// has no parent, prev, or next. +/// +MachineInstr * +MachineFunction::CloneMachineInstr(const MachineInstr *Orig) { + return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) + MachineInstr(*this, *Orig); +} + +/// DeleteMachineInstr - Delete the given MachineInstr. +/// +void +MachineFunction::DeleteMachineInstr(MachineInstr *MI) { + MI->~MachineInstr(); + InstructionRecycler.Deallocate(Allocator, MI); +} + +/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this +/// instead of `new MachineBasicBlock'. +/// +MachineBasicBlock * +MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) { + return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator)) + MachineBasicBlock(*this, bb); +} + +/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock. +/// +void +MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { + assert(MBB->getParent() == this && "MBB parent mismatch!"); + MBB->~MachineBasicBlock(); + BasicBlockRecycler.Deallocate(Allocator, MBB); +} + +MachineMemOperand * +MachineFunction::getMachineMemOperand(const Value *v, unsigned f, + int64_t o, uint64_t s, + unsigned base_alignment) { + return new (Allocator) MachineMemOperand(v, f, o, s, base_alignment); +} + +MachineMemOperand * +MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, + int64_t Offset, uint64_t Size) { + return new (Allocator) + MachineMemOperand(MMO->getValue(), MMO->getFlags(), + int64_t(uint64_t(MMO->getOffset()) + + uint64_t(Offset)), + Size, MMO->getBaseAlignment()); +} + +MachineInstr::mmo_iterator +MachineFunction::allocateMemRefsArray(unsigned long Num) { + return Allocator.Allocate<MachineMemOperand *>(Num); +} + +std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> +MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End) { + // Count the number of load mem refs. + unsigned Num = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) + if ((*I)->isLoad()) + ++Num; + + // Allocate a new array and populate it with the load information. + MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); + unsigned Index = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { + if ((*I)->isLoad()) { + if (!(*I)->isStore()) + // Reuse the MMO. + Result[Index] = *I; + else { + // Clone the MMO and unset the store flag. + MachineMemOperand *JustLoad = + getMachineMemOperand((*I)->getValue(), + (*I)->getFlags() & ~MachineMemOperand::MOStore, + (*I)->getOffset(), (*I)->getSize(), + (*I)->getBaseAlignment()); + Result[Index] = JustLoad; + } + ++Index; + } + } + return std::make_pair(Result, Result + Num); +} + +std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> +MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End) { + // Count the number of load mem refs. + unsigned Num = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) + if ((*I)->isStore()) + ++Num; + + // Allocate a new array and populate it with the store information. + MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); + unsigned Index = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { + if ((*I)->isStore()) { + if (!(*I)->isLoad()) + // Reuse the MMO. + Result[Index] = *I; + else { + // Clone the MMO and unset the load flag. + MachineMemOperand *JustStore = + getMachineMemOperand((*I)->getValue(), + (*I)->getFlags() & ~MachineMemOperand::MOLoad, + (*I)->getOffset(), (*I)->getSize(), + (*I)->getBaseAlignment()); + Result[Index] = JustStore; + } + ++Index; + } + } + return std::make_pair(Result, Result + Num); +} + +void MachineFunction::dump() const { + print(dbgs()); +} + +void MachineFunction::print(raw_ostream &OS) const { + OS << "# Machine code for function " << Fn->getName() << ":\n"; + + // Print Frame Information + FrameInfo->print(*this, OS); + + // Print JumpTable Information + if (JumpTableInfo) + JumpTableInfo->print(OS); + + // Print Constant Pool + ConstantPool->print(OS); + + const TargetRegisterInfo *TRI = getTarget().getRegisterInfo(); + + if (RegInfo && !RegInfo->livein_empty()) { + OS << "Function Live Ins: "; + for (MachineRegisterInfo::livein_iterator + I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) { + if (TRI) + OS << "%" << TRI->getName(I->first); + else + OS << " %physreg" << I->first; + + if (I->second) + OS << " in reg%" << I->second; + + if (llvm::next(I) != E) + OS << ", "; + } + OS << '\n'; + } + if (RegInfo && !RegInfo->liveout_empty()) { + OS << "Function Live Outs: "; + for (MachineRegisterInfo::liveout_iterator + I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I){ + if (TRI) + OS << '%' << TRI->getName(*I); + else + OS << "%physreg" << *I; + + if (llvm::next(I) != E) + OS << " "; + } + OS << '\n'; + } + + for (const_iterator BB = begin(), E = end(); BB != E; ++BB) { + OS << '\n'; + BB->print(OS); + } + + OS << "\n# End machine code for function " << Fn->getName() << ".\n\n"; +} + +namespace llvm { + template<> + struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const MachineFunction *F) { + return "CFG for '" + F->getFunction()->getNameStr() + "' function"; + } + + std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineFunction *Graph) { + if (isSimple () && Node->getBasicBlock() && + !Node->getBasicBlock()->getName().empty()) + return Node->getBasicBlock()->getNameStr() + ":"; + + std::string OutStr; + { + raw_string_ostream OSS(OutStr); + + if (isSimple()) + OSS << Node->getNumber() << ':'; + else + Node->print(OSS); + } + + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + } + return OutStr; + } + }; +} + +void MachineFunction::viewCFG() const +{ +#ifndef NDEBUG + ViewGraph(this, "mf" + getFunction()->getNameStr()); +#else + errs() << "MachineFunction::viewCFG is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +void MachineFunction::viewCFGOnly() const +{ +#ifndef NDEBUG + ViewGraph(this, "mf" + getFunction()->getNameStr(), true); +#else + errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +/// addLiveIn - Add the specified physical register as a live-in value and +/// create a corresponding virtual register for it. +unsigned MachineFunction::addLiveIn(unsigned PReg, + const TargetRegisterClass *RC) { + MachineRegisterInfo &MRI = getRegInfo(); + unsigned VReg = MRI.getLiveInVirtReg(PReg); + if (VReg) { + assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!"); + return VReg; + } + VReg = MRI.createVirtualRegister(RC); + MRI.addLiveIn(PReg, VReg); + return VReg; +} + +/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. +/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a +/// normal 'L' label is returned. +MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, + bool isLinkerPrivate) const { + assert(JumpTableInfo && "No jump tables"); + + assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); + const MCAsmInfo &MAI = *getTarget().getMCAsmInfo(); + + const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() : + MAI.getPrivateGlobalPrefix(); + SmallString<60> Name; + raw_svector_ostream(Name) + << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; + return Ctx.GetOrCreateSymbol(Name.str()); +} + + +//===----------------------------------------------------------------------===// +// MachineFrameInfo implementation +//===----------------------------------------------------------------------===// + +/// CreateFixedObject - Create a new object at a fixed location on the stack. +/// All fixed objects should be created before other objects are created for +/// efficiency. By default, fixed objects are immutable. This returns an +/// index with a negative value. +/// +int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, + bool Immutable) { + assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. + unsigned StackAlign = TFI.getStackAlignment(); + unsigned Align = MinAlign(SPOffset, StackAlign); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/false, false)); + return -++NumFixedObjects; +} + + +BitVector +MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { + assert(MBB && "MBB must be valid"); + const MachineFunction *MF = MBB->getParent(); + assert(MF && "MBB must be part of a MachineFunction"); + const TargetMachine &TM = MF->getTarget(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + BitVector BV(TRI->getNumRegs()); + + // Before CSI is calculated, no registers are considered pristine. They can be + // freely used and PEI will make sure they are saved. + if (!isCalleeSavedInfoValid()) + return BV; + + for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) + BV.set(*CSR); + + // The entry MBB always has all CSRs pristine. + if (MBB == &MF->front()) + return BV; + + // On other MBBs the saved CSRs are not pristine. + const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo(); + for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) + BV.reset(I->getReg()); + + return BV; +} + + +void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ + if (Objects.empty()) return; + + const TargetFrameInfo *FI = MF.getTarget().getFrameInfo(); + int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); + + OS << "Frame Objects:\n"; + + for (unsigned i = 0, e = Objects.size(); i != e; ++i) { + const StackObject &SO = Objects[i]; + OS << " fi#" << (int)(i-NumFixedObjects) << ": "; + if (SO.Size == ~0ULL) { + OS << "dead\n"; + continue; + } + if (SO.Size == 0) + OS << "variable sized"; + else + OS << "size=" << SO.Size; + OS << ", align=" << SO.Alignment; + + if (i < NumFixedObjects) + OS << ", fixed"; + if (i < NumFixedObjects || SO.SPOffset != -1) { + int64_t Off = SO.SPOffset - ValOffset; + OS << ", at location [SP"; + if (Off > 0) + OS << "+" << Off; + else if (Off < 0) + OS << Off; + OS << "]"; + } + OS << "\n"; + } +} + +void MachineFrameInfo::dump(const MachineFunction &MF) const { + print(MF, dbgs()); +} + +//===----------------------------------------------------------------------===// +// MachineJumpTableInfo implementation +//===----------------------------------------------------------------------===// + +/// getEntrySize - Return the size of each entry in the jump table. +unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const { + // The size of a jump table entry is 4 bytes unless the entry is just the + // address of a block, in which case it is the pointer size. + switch (getEntryKind()) { + case MachineJumpTableInfo::EK_BlockAddress: + return TD.getPointerSize(); + case MachineJumpTableInfo::EK_GPRel32BlockAddress: + case MachineJumpTableInfo::EK_LabelDifference32: + case MachineJumpTableInfo::EK_Custom32: + return 4; + case MachineJumpTableInfo::EK_Inline: + return 0; + } + assert(0 && "Unknown jump table encoding!"); + return ~0; +} + +/// getEntryAlignment - Return the alignment of each entry in the jump table. +unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const { + // The alignment of a jump table entry is the alignment of int32 unless the + // entry is just the address of a block, in which case it is the pointer + // alignment. + switch (getEntryKind()) { + case MachineJumpTableInfo::EK_BlockAddress: + return TD.getPointerABIAlignment(); + case MachineJumpTableInfo::EK_GPRel32BlockAddress: + case MachineJumpTableInfo::EK_LabelDifference32: + case MachineJumpTableInfo::EK_Custom32: + return TD.getABIIntegerTypeAlignment(32); + case MachineJumpTableInfo::EK_Inline: + return 1; + } + assert(0 && "Unknown jump table encoding!"); + return ~0; +} + +/// createJumpTableIndex - Create a new jump table entry in the jump table info. +/// +unsigned MachineJumpTableInfo::createJumpTableIndex( + const std::vector<MachineBasicBlock*> &DestBBs) { + assert(!DestBBs.empty() && "Cannot create an empty jump table!"); + JumpTables.push_back(MachineJumpTableEntry(DestBBs)); + return JumpTables.size()-1; +} + +/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update +/// the jump tables to branch to New instead. +bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Not making a change?"); + bool MadeChange = false; + for (size_t i = 0, e = JumpTables.size(); i != e; ++i) + ReplaceMBBInJumpTable(i, Old, New); + return MadeChange; +} + +/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update +/// the jump table to branch to New instead. +bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx, + MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Not making a change?"); + bool MadeChange = false; + MachineJumpTableEntry &JTE = JumpTables[Idx]; + for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j) + if (JTE.MBBs[j] == Old) { + JTE.MBBs[j] = New; + MadeChange = true; + } + return MadeChange; +} + +void MachineJumpTableInfo::print(raw_ostream &OS) const { + if (JumpTables.empty()) return; + + OS << "Jump Tables:\n"; + + for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { + OS << " jt#" << i << ": "; + for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j) + OS << " BB#" << JumpTables[i].MBBs[j]->getNumber(); + } + + OS << '\n'; +} + +void MachineJumpTableInfo::dump() const { print(dbgs()); } + + +//===----------------------------------------------------------------------===// +// MachineConstantPool implementation +//===----------------------------------------------------------------------===// + +const Type *MachineConstantPoolEntry::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + + +unsigned MachineConstantPoolEntry::getRelocationInfo() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getRelocationInfo(); + return Val.ConstVal->getRelocationInfo(); +} + +MachineConstantPool::~MachineConstantPool() { + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (Constants[i].isMachineConstantPoolEntry()) + delete Constants[i].Val.MachineCPVal; +} + +/// CanShareConstantPoolEntry - Test whether the given two constants +/// can be allocated the same constant pool entry. +static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, + const TargetData *TD) { + // Handle the trivial case quickly. + if (A == B) return true; + + // If they have the same type but weren't the same constant, quickly + // reject them. + if (A->getType() == B->getType()) return false; + + // For now, only support constants with the same size. + if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType())) + return false; + + // If a floating-point value and an integer value have the same encoding, + // they can share a constant-pool entry. + if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A)) + if (const ConstantInt *BI = dyn_cast<ConstantInt>(B)) + return AFP->getValueAPF().bitcastToAPInt() == BI->getValue(); + if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B)) + if (const ConstantInt *AI = dyn_cast<ConstantInt>(A)) + return BFP->getValueAPF().bitcastToAPInt() == AI->getValue(); + + // Two vectors can share an entry if each pair of corresponding + // elements could. + if (const ConstantVector *AV = dyn_cast<ConstantVector>(A)) + if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) { + if (AV->getType()->getNumElements() != BV->getType()->getNumElements()) + return false; + for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i) + if (!CanShareConstantPoolEntry(AV->getOperand(i), + BV->getOperand(i), TD)) + return false; + return true; + } + + // TODO: Handle other cases. + + return false; +} + +/// getConstantPoolIndex - Create a new entry in the constant pool or return +/// an existing one. User must specify the log2 of the minimum required +/// alignment for the object. +/// +unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (!Constants[i].isMachineConstantPoolEntry() && + CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) { + if ((unsigned)Constants[i].getAlignment() < Alignment) + Constants[i].Alignment = Alignment; + return i; + } + + Constants.push_back(MachineConstantPoolEntry(C, Alignment)); + return Constants.size()-1; +} + +unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + int Idx = V->getExistingMachineCPValue(this, Alignment); + if (Idx != -1) + return (unsigned)Idx; + + Constants.push_back(MachineConstantPoolEntry(V, Alignment)); + return Constants.size()-1; +} + +void MachineConstantPool::print(raw_ostream &OS) const { + if (Constants.empty()) return; + + OS << "Constant Pool:\n"; + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + OS << " cp#" << i << ": "; + if (Constants[i].isMachineConstantPoolEntry()) + Constants[i].Val.MachineCPVal->print(OS); + else + OS << *(Value*)Constants[i].Val.ConstVal; + OS << ", align=" << Constants[i].getAlignment(); + OS << "\n"; + } +} + +void MachineConstantPool::dump() const { print(dbgs()); } diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp new file mode 100644 index 0000000..4f84b95 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -0,0 +1,62 @@ +//===-- MachineFunctionAnalysis.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the MachineFunctionAnalysis members. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +using namespace llvm; + +// Register this pass with PassInfo directly to avoid having to define +// a default constructor. +static PassInfo +X("Machine Function Analysis", "machine-function-analysis", + &MachineFunctionAnalysis::ID, 0, + /*CFGOnly=*/false, /*is_analysis=*/true); + +char MachineFunctionAnalysis::ID = 0; + +MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm, + CodeGenOpt::Level OL) : + FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) { +} + +MachineFunctionAnalysis::~MachineFunctionAnalysis() { + releaseMemory(); + assert(!MF && "MachineFunctionAnalysis left initialized!"); +} + +void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineModuleInfo>(); +} + +bool MachineFunctionAnalysis::doInitialization(Module &M) { + MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + assert(MMI && "MMI not around yet??"); + MMI->setModule(&M); + NextFnNum = 0; + return false; +} + + +bool MachineFunctionAnalysis::runOnFunction(Function &F) { + assert(!MF && "MachineFunctionAnalysis already initialized!"); + MF = new MachineFunction(&F, TM, NextFnNum++, + getAnalysis<MachineModuleInfo>()); + return false; +} + +void MachineFunctionAnalysis::releaseMemory() { + delete MF; + MF = 0; +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp new file mode 100644 index 0000000..e5a4912 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -0,0 +1,56 @@ +//===-- MachineFunctionPass.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the MachineFunctionPass members. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +using namespace llvm; + +Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return createMachineFunctionPrinterPass(O, Banner); +} + +bool MachineFunctionPass::runOnFunction(Function &F) { + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (F.hasAvailableExternallyLinkage()) + return false; + + MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF(); + return runOnMachineFunction(MF); +} + +void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineFunctionAnalysis>(); + AU.addPreserved<MachineFunctionAnalysis>(); + + // MachineFunctionPass preserves all LLVM IR passes, but there's no + // high-level way to express this. Instead, just list a bunch of + // passes explicitly. This does not include setPreservesCFG, + // because CodeGen overloads that to mean preserving the MachineBasicBlock + // CFG in addition to the LLVM IR CFG. + AU.addPreserved<AliasAnalysis>(); + AU.addPreserved("scalar-evolution"); + AU.addPreserved("iv-users"); + AU.addPreserved("memdep"); + AU.addPreserved("live-values"); + AU.addPreserved("domtree"); + AU.addPreserved("domfrontier"); + AU.addPreserved("loops"); + AU.addPreserved("lda"); + + FunctionPass::getAnalysisUsage(AU); +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp new file mode 100644 index 0000000..2aaa798 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -0,0 +1,60 @@ +//===-- MachineFunctionPrinterPass.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineFunctionPrinterPass implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +/// MachineFunctionPrinterPass - This is a pass to dump the IR of a +/// MachineFunction. +/// +struct MachineFunctionPrinterPass : public MachineFunctionPass { + static char ID; + + raw_ostream &OS; + const std::string Banner; + + MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) + : MachineFunctionPass(ID), OS(os), Banner(banner) {} + + const char *getPassName() const { return "MachineFunction Printer"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) { + OS << "# " << Banner << ":\n"; + MF.print(OS); + return false; + } +}; + +char MachineFunctionPrinterPass::ID = 0; +} + +namespace llvm { +/// Returns a newly-created MachineFunction Printer pass. The +/// default banner is empty. +/// +MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS, + const std::string &Banner){ + return new MachineFunctionPrinterPass(OS, Banner); +} + +} diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp new file mode 100644 index 0000000..446e461 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -0,0 +1,1573 @@ +//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Methods common to all machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/InlineAsm.h" +#include "llvm/Metadata.h" +#include "llvm/Type.h" +#include "llvm/Value.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetInstrDesc.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LeakDetector.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/FoldingSet.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineOperand Implementation +//===----------------------------------------------------------------------===// + +/// AddRegOperandToRegInfo - Add this register operand to the specified +/// MachineRegisterInfo. If it is null, then the next/prev fields should be +/// explicitly nulled out. +void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) { + assert(isReg() && "Can only add reg operand to use lists"); + + // If the reginfo pointer is null, just explicitly null out or next/prev + // pointers, to ensure they are not garbage. + if (RegInfo == 0) { + Contents.Reg.Prev = 0; + Contents.Reg.Next = 0; + return; + } + + // Otherwise, add this operand to the head of the registers use/def list. + MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg()); + + // For SSA values, we prefer to keep the definition at the start of the list. + // we do this by skipping over the definition if it is at the head of the + // list. + if (*Head && (*Head)->isDef()) + Head = &(*Head)->Contents.Reg.Next; + + Contents.Reg.Next = *Head; + if (Contents.Reg.Next) { + assert(getReg() == Contents.Reg.Next->getReg() && + "Different regs on the same list!"); + Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next; + } + + Contents.Reg.Prev = Head; + *Head = this; +} + +/// RemoveRegOperandFromRegInfo - Remove this register operand from the +/// MachineRegisterInfo it is linked with. +void MachineOperand::RemoveRegOperandFromRegInfo() { + assert(isOnRegUseList() && "Reg operand is not on a use list"); + // Unlink this from the doubly linked list of operands. + MachineOperand *NextOp = Contents.Reg.Next; + *Contents.Reg.Prev = NextOp; + if (NextOp) { + assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!"); + NextOp->Contents.Reg.Prev = Contents.Reg.Prev; + } + Contents.Reg.Prev = 0; + Contents.Reg.Next = 0; +} + +void MachineOperand::setReg(unsigned Reg) { + if (getReg() == Reg) return; // No change. + + // Otherwise, we have to change the register. If this operand is embedded + // into a machine function, we need to update the old and new register's + // use/def lists. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + RemoveRegOperandFromRegInfo(); + Contents.Reg.RegNo = Reg; + AddRegOperandToRegInfo(&MF->getRegInfo()); + return; + } + + // Otherwise, just change the register, no problem. :) + Contents.Reg.RegNo = Reg; +} + +void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + if (SubIdx && getSubReg()) + SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); + setReg(Reg); + if (SubIdx) + setSubReg(SubIdx); +} + +void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (getSubReg()) { + Reg = TRI.getSubReg(Reg, getSubReg()); + assert(Reg && "Invalid SubReg for physical register"); + setSubReg(0); + } + setReg(Reg); +} + +/// ChangeToImmediate - Replace this operand with a new immediate operand of +/// the specified value. If an operand is known to be an immediate already, +/// the setImm method should be used. +void MachineOperand::ChangeToImmediate(int64_t ImmVal) { + // If this operand is currently a register operand, and if this is in a + // function, deregister the operand from the register's use/def list. + if (isReg() && getParent() && getParent()->getParent() && + getParent()->getParent()->getParent()) + RemoveRegOperandFromRegInfo(); + + OpKind = MO_Immediate; + Contents.ImmVal = ImmVal; +} + +/// ChangeToRegister - Replace this operand with a new register operand of +/// the specified value. If an operand is known to be an register already, +/// the setReg method should be used. +void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, + bool isKill, bool isDead, bool isUndef, + bool isDebug) { + // If this operand is already a register operand, use setReg to update the + // register's use/def lists. + if (isReg()) { + assert(!isEarlyClobber()); + setReg(Reg); + } else { + // Otherwise, change this to a register and set the reg#. + OpKind = MO_Register; + Contents.Reg.RegNo = Reg; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + AddRegOperandToRegInfo(&MF->getRegInfo()); + } + + IsDef = isDef; + IsImp = isImp; + IsKill = isKill; + IsDead = isDead; + IsUndef = isUndef; + IsEarlyClobber = false; + IsDebug = isDebug; + SubReg = 0; +} + +/// isIdenticalTo - Return true if this operand is identical to the specified +/// operand. +bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { + if (getType() != Other.getType() || + getTargetFlags() != Other.getTargetFlags()) + return false; + + switch (getType()) { + default: llvm_unreachable("Unrecognized operand type"); + case MachineOperand::MO_Register: + return getReg() == Other.getReg() && isDef() == Other.isDef() && + getSubReg() == Other.getSubReg(); + case MachineOperand::MO_Immediate: + return getImm() == Other.getImm(); + case MachineOperand::MO_FPImmediate: + return getFPImm() == Other.getFPImm(); + case MachineOperand::MO_MachineBasicBlock: + return getMBB() == Other.getMBB(); + case MachineOperand::MO_FrameIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_ConstantPoolIndex: + return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); + case MachineOperand::MO_JumpTableIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_GlobalAddress: + return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); + case MachineOperand::MO_ExternalSymbol: + return !strcmp(getSymbolName(), Other.getSymbolName()) && + getOffset() == Other.getOffset(); + case MachineOperand::MO_BlockAddress: + return getBlockAddress() == Other.getBlockAddress(); + case MachineOperand::MO_MCSymbol: + return getMCSymbol() == Other.getMCSymbol(); + case MachineOperand::MO_Metadata: + return getMetadata() == Other.getMetadata(); + } +} + +/// print - Print the specified machine operand. +/// +void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { + // If the instruction is embedded into a basic block, we can find the + // target info for the instruction. + if (!TM) + if (const MachineInstr *MI = getParent()) + if (const MachineBasicBlock *MBB = MI->getParent()) + if (const MachineFunction *MF = MBB->getParent()) + TM = &MF->getTarget(); + + switch (getType()) { + case MachineOperand::MO_Register: + if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) { + OS << "%reg" << getReg(); + } else { + if (TM) + OS << "%" << TM->getRegisterInfo()->get(getReg()).Name; + else + OS << "%physreg" << getReg(); + } + + if (getSubReg() != 0) { + if (TM) + OS << ':' << TM->getRegisterInfo()->getSubRegIndexName(getSubReg()); + else + OS << ':' << getSubReg(); + } + + if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || + isEarlyClobber()) { + OS << '<'; + bool NeedComma = false; + if (isDef()) { + if (NeedComma) OS << ','; + if (isEarlyClobber()) + OS << "earlyclobber,"; + if (isImplicit()) + OS << "imp-"; + OS << "def"; + NeedComma = true; + } else if (isImplicit()) { + OS << "imp-use"; + NeedComma = true; + } + + if (isKill() || isDead() || isUndef()) { + if (NeedComma) OS << ','; + if (isKill()) OS << "kill"; + if (isDead()) OS << "dead"; + if (isUndef()) { + if (isKill() || isDead()) + OS << ','; + OS << "undef"; + } + } + OS << '>'; + } + break; + case MachineOperand::MO_Immediate: + OS << getImm(); + break; + case MachineOperand::MO_FPImmediate: + if (getFPImm()->getType()->isFloatTy()) + OS << getFPImm()->getValueAPF().convertToFloat(); + else + OS << getFPImm()->getValueAPF().convertToDouble(); + break; + case MachineOperand::MO_MachineBasicBlock: + OS << "<BB#" << getMBB()->getNumber() << ">"; + break; + case MachineOperand::MO_FrameIndex: + OS << "<fi#" << getIndex() << '>'; + break; + case MachineOperand::MO_ConstantPoolIndex: + OS << "<cp#" << getIndex(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_JumpTableIndex: + OS << "<jt#" << getIndex() << '>'; + break; + case MachineOperand::MO_GlobalAddress: + OS << "<ga:"; + WriteAsOperand(OS, getGlobal(), /*PrintType=*/false); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_ExternalSymbol: + OS << "<es:" << getSymbolName(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_BlockAddress: + OS << '<'; + WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false); + OS << '>'; + break; + case MachineOperand::MO_Metadata: + OS << '<'; + WriteAsOperand(OS, getMetadata(), /*PrintType=*/false); + OS << '>'; + break; + case MachineOperand::MO_MCSymbol: + OS << "<MCSym=" << *getMCSymbol() << '>'; + break; + default: + llvm_unreachable("Unrecognized operand type"); + } + + if (unsigned TF = getTargetFlags()) + OS << "[TF=" << TF << ']'; +} + +//===----------------------------------------------------------------------===// +// MachineMemOperand Implementation +//===----------------------------------------------------------------------===// + +MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f, + int64_t o, uint64_t s, unsigned int a) + : Offset(o), Size(s), V(v), + Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) { + assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); + assert((isLoad() || isStore()) && "Not a load/store!"); +} + +/// Profile - Gather unique data for the object. +/// +void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Offset); + ID.AddInteger(Size); + ID.AddPointer(V); + ID.AddInteger(Flags); +} + +void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { + // The Value and Offset may differ due to CSE. But the flags and size + // should be the same. + assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); + assert(MMO->getSize() == getSize() && "Size mismatch!"); + + if (MMO->getBaseAlignment() >= getBaseAlignment()) { + // Update the alignment value. + Flags = (Flags & ((1 << MOMaxBits) - 1)) | + ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits); + // Also update the base and offset, because the new alignment may + // not be applicable with the old ones. + V = MMO->getValue(); + Offset = MMO->getOffset(); + } +} + +/// getAlignment - Return the minimum known alignment in bytes of the +/// actual memory reference. +uint64_t MachineMemOperand::getAlignment() const { + return MinAlign(getBaseAlignment(), getOffset()); +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { + assert((MMO.isLoad() || MMO.isStore()) && + "SV has to be a load, store or both."); + + if (MMO.isVolatile()) + OS << "Volatile "; + + if (MMO.isLoad()) + OS << "LD"; + if (MMO.isStore()) + OS << "ST"; + OS << MMO.getSize(); + + // Print the address information. + OS << "["; + if (!MMO.getValue()) + OS << "<unknown>"; + else + WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false); + + // If the alignment of the memory reference itself differs from the alignment + // of the base pointer, print the base alignment explicitly, next to the base + // pointer. + if (MMO.getBaseAlignment() != MMO.getAlignment()) + OS << "(align=" << MMO.getBaseAlignment() << ")"; + + if (MMO.getOffset() != 0) + OS << "+" << MMO.getOffset(); + OS << "]"; + + // Print the alignment of the reference. + if (MMO.getBaseAlignment() != MMO.getAlignment() || + MMO.getBaseAlignment() != MMO.getSize()) + OS << "(align=" << MMO.getAlignment() << ")"; + + return OS; +} + +//===----------------------------------------------------------------------===// +// MachineInstr Implementation +//===----------------------------------------------------------------------===// + +/// MachineInstr ctor - This constructor creates a dummy MachineInstr with +/// TID NULL and no operands. +MachineInstr::MachineInstr() + : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), + Parent(0) { + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); +} + +void MachineInstr::addImplicitDefUseOperands() { + if (TID->ImplicitDefs) + for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs) + addOperand(MachineOperand::CreateReg(*ImpDefs, true, true)); + if (TID->ImplicitUses) + for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses) + addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); +} + +/// MachineInstr ctor - This constructor creates a MachineInstr and adds the +/// implicit operands. It reserves space for the number of operands specified by +/// the TargetInstrDesc. +MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0) { + if (!NoImp) + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + if (!NoImp) + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); +} + +/// MachineInstr ctor - As above, but with a DebugLoc. +MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, + bool NoImp) + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), + Parent(0), debugLoc(dl) { + if (!NoImp) + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + if (!NoImp) + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); +} + +/// MachineInstr ctor - Work exactly the same as the ctor two above, except +/// that the MachineInstr is created and added to the end of the specified +/// basic block. +MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0) { + assert(MBB && "Cannot use inserting ctor with null basic block!"); + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); + MBB->push_back(this); // Add instruction to end of basic block! +} + +/// MachineInstr ctor - As above, but with a DebugLoc. +/// +MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, + const TargetInstrDesc &tid) + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), + Parent(0), debugLoc(dl) { + assert(MBB && "Cannot use inserting ctor with null basic block!"); + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); + MBB->push_back(this); // Add instruction to end of basic block! +} + +/// MachineInstr ctor - Copies MachineInstr arg exactly +/// +MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) + : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0), + MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), + Parent(0), debugLoc(MI.getDebugLoc()) { + Operands.reserve(MI.getNumOperands()); + + // Add operands + for (unsigned i = 0; i != MI.getNumOperands(); ++i) + addOperand(MI.getOperand(i)); + NumImplicitOps = MI.NumImplicitOps; + + // Set parent to null. + Parent = 0; + + LeakDetector::addGarbageObject(this); +} + +MachineInstr::~MachineInstr() { + LeakDetector::removeGarbageObject(this); +#ifndef NDEBUG + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + assert(Operands[i].ParentMI == this && "ParentMI mismatch!"); + assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) && + "Reg operand def/use list corrupted"); + } +#endif +} + +/// getRegInfo - If this instruction is embedded into a MachineFunction, +/// return the MachineRegisterInfo object for the current function, otherwise +/// return null. +MachineRegisterInfo *MachineInstr::getRegInfo() { + if (MachineBasicBlock *MBB = getParent()) + return &MBB->getParent()->getRegInfo(); + return 0; +} + +/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in +/// this instruction from their respective use lists. This requires that the +/// operands already be on their use lists. +void MachineInstr::RemoveRegOperandsFromUseLists() { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].RemoveRegOperandFromRegInfo(); + } +} + +/// AddRegOperandsToUseLists - Add all of the register operands in +/// this instruction from their respective use lists. This requires that the +/// operands not be on their use lists yet. +void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].AddRegOperandToRegInfo(&RegInfo); + } +} + + +/// addOperand - Add the specified operand to the instruction. If it is an +/// implicit operand, it is added to the end of the operand list. If it is +/// an explicit operand it is added at the end of the explicit operand list +/// (before the first implicit operand). +void MachineInstr::addOperand(const MachineOperand &Op) { + bool isImpReg = Op.isReg() && Op.isImplicit(); + assert((isImpReg || !OperandsComplete()) && + "Trying to add an operand to a machine instr that is already done!"); + + MachineRegisterInfo *RegInfo = getRegInfo(); + + // If we are adding the operand to the end of the list, our job is simpler. + // This is true most of the time, so this is a reasonable optimization. + if (isImpReg || NumImplicitOps == 0) { + // We can only do this optimization if we know that the operand list won't + // reallocate. + if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) { + Operands.push_back(Op); + + // Set the parent of the operand. + Operands.back().ParentMI = this; + + // If the operand is a register, update the operand's use list. + if (Op.isReg()) { + Operands.back().AddRegOperandToRegInfo(RegInfo); + // If the register operand is flagged as early, mark the operand as such + unsigned OpNo = Operands.size() - 1; + if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } + return; + } + } + + // Otherwise, we have to insert a real operand before any implicit ones. + unsigned OpNo = Operands.size()-NumImplicitOps; + + // If this instruction isn't embedded into a function, then we don't need to + // update any operand lists. + if (RegInfo == 0) { + // Simple insertion, no reginfo update needed for other register operands. + Operands.insert(Operands.begin()+OpNo, Op); + Operands[OpNo].ParentMI = this; + + // Do explicitly set the reginfo for this operand though, to ensure the + // next/prev fields are properly nulled out. + if (Operands[OpNo].isReg()) { + Operands[OpNo].AddRegOperandToRegInfo(0); + // If the register operand is flagged as early, mark the operand as such + if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } + + } else if (Operands.size()+1 <= Operands.capacity()) { + // Otherwise, we have to remove register operands from their register use + // list, add the operand, then add the register operands back to their use + // list. This also must handle the case when the operand list reallocates + // to somewhere else. + + // If insertion of this operand won't cause reallocation of the operand + // list, just remove the implicit operands, add the operand, then re-add all + // the rest of the operands. + for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { + assert(Operands[i].isReg() && "Should only be an implicit reg!"); + Operands[i].RemoveRegOperandFromRegInfo(); + } + + // Add the operand. If it is a register, add it to the reg list. + Operands.insert(Operands.begin()+OpNo, Op); + Operands[OpNo].ParentMI = this; + + if (Operands[OpNo].isReg()) { + Operands[OpNo].AddRegOperandToRegInfo(RegInfo); + // If the register operand is flagged as early, mark the operand as such + if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } + + // Re-add all the implicit ops. + for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) { + assert(Operands[i].isReg() && "Should only be an implicit reg!"); + Operands[i].AddRegOperandToRegInfo(RegInfo); + } + } else { + // Otherwise, we will be reallocating the operand list. Remove all reg + // operands from their list, then readd them after the operand list is + // reallocated. + RemoveRegOperandsFromUseLists(); + + Operands.insert(Operands.begin()+OpNo, Op); + Operands[OpNo].ParentMI = this; + + // Re-add all the operands. + AddRegOperandsToUseLists(*RegInfo); + + // If the register operand is flagged as early, mark the operand as such + if (Operands[OpNo].isReg() + && TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } +} + +/// RemoveOperand - Erase an operand from an instruction, leaving it with one +/// fewer operand than it started with. +/// +void MachineInstr::RemoveOperand(unsigned OpNo) { + assert(OpNo < Operands.size() && "Invalid operand number"); + + // Special case removing the last one. + if (OpNo == Operands.size()-1) { + // If needed, remove from the reg def/use list. + if (Operands.back().isReg() && Operands.back().isOnRegUseList()) + Operands.back().RemoveRegOperandFromRegInfo(); + + Operands.pop_back(); + return; + } + + // Otherwise, we are removing an interior operand. If we have reginfo to + // update, remove all operands that will be shifted down from their reg lists, + // move everything down, then re-add them. + MachineRegisterInfo *RegInfo = getRegInfo(); + if (RegInfo) { + for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].RemoveRegOperandFromRegInfo(); + } + } + + Operands.erase(Operands.begin()+OpNo); + + if (RegInfo) { + for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].AddRegOperandToRegInfo(RegInfo); + } + } +} + +/// addMemOperand - Add a MachineMemOperand to the machine instruction. +/// This function should be used only occasionally. The setMemRefs function +/// is the primary method for setting up a MachineInstr's MemRefs list. +void MachineInstr::addMemOperand(MachineFunction &MF, + MachineMemOperand *MO) { + mmo_iterator OldMemRefs = MemRefs; + mmo_iterator OldMemRefsEnd = MemRefsEnd; + + size_t NewNum = (MemRefsEnd - MemRefs) + 1; + mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum); + mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum; + + std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs); + NewMemRefs[NewNum - 1] = MO; + + MemRefs = NewMemRefs; + MemRefsEnd = NewMemRefsEnd; +} + +bool MachineInstr::isIdenticalTo(const MachineInstr *Other, + MICheckType Check) const { + // If opcodes or number of operands are not the same then the two + // instructions are obviously not identical. + if (Other->getOpcode() != getOpcode() || + Other->getNumOperands() != getNumOperands()) + return false; + + // Check operands to make sure they match. + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + const MachineOperand &OMO = Other->getOperand(i); + // Clients may or may not want to ignore defs when testing for equality. + // For example, machine CSE pass only cares about finding common + // subexpressions, so it's safe to ignore virtual register defs. + if (Check != CheckDefs && MO.isReg() && MO.isDef()) { + if (Check == IgnoreDefs) + continue; + // Check == IgnoreVRegDefs + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + TargetRegisterInfo::isPhysicalRegister(OMO.getReg())) + if (MO.getReg() != OMO.getReg()) + return false; + } else if (!MO.isIdenticalTo(OMO)) + return false; + } + return true; +} + +/// removeFromParent - This method unlinks 'this' from the containing basic +/// block, and returns it, but does not delete it. +MachineInstr *MachineInstr::removeFromParent() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->remove(this); + return this; +} + + +/// eraseFromParent - This method unlinks 'this' from the containing basic +/// block, and deletes it. +void MachineInstr::eraseFromParent() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->erase(this); +} + + +/// OperandComplete - Return true if it's illegal to add a new operand +/// +bool MachineInstr::OperandsComplete() const { + unsigned short NumOperands = TID->getNumOperands(); + if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands) + return true; // Broken: we have all the operands of this instruction! + return false; +} + +/// getNumExplicitOperands - Returns the number of non-implicit operands. +/// +unsigned MachineInstr::getNumExplicitOperands() const { + unsigned NumOperands = TID->getNumOperands(); + if (!TID->isVariadic()) + return NumOperands; + + for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isImplicit()) + NumOperands++; + } + return NumOperands; +} + + +/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of +/// the specific register or -1 if it is not found. It further tightens +/// the search criteria to a use that kills the register if isKill is true. +int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill, + const TargetRegisterInfo *TRI) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MOReg == Reg || + (TRI && + TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(MOReg, Reg))) + if (!isKill || MO.isKill()) + return i; + } + return -1; +} + +/// readsWritesVirtualRegister - Return a pair of bools (reads, writes) +/// indicating if this instruction reads or writes Reg. This also considers +/// partial defines. +std::pair<bool,bool> +MachineInstr::readsWritesVirtualRegister(unsigned Reg, + SmallVectorImpl<unsigned> *Ops) const { + bool PartDef = false; // Partial redefine. + bool FullDef = false; // Full define. + bool Use = false; + + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (Ops) + Ops->push_back(i); + if (MO.isUse()) + Use |= !MO.isUndef(); + else if (MO.getSubReg()) + PartDef = true; + else + FullDef = true; + } + // A partial redefine uses Reg unless there is also a full define. + return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef); +} + +/// findRegisterDefOperandIdx() - Returns the operand index that is a def of +/// the specified register or -1 if it is not found. If isDead is true, defs +/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it +/// also checks if there is a def of a super-register. +int +MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, + const TargetRegisterInfo *TRI) const { + bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + bool Found = (MOReg == Reg); + if (!Found && TRI && isPhys && + TargetRegisterInfo::isPhysicalRegister(MOReg)) { + if (Overlap) + Found = TRI->regsOverlap(MOReg, Reg); + else + Found = TRI->isSubRegister(MOReg, Reg); + } + if (Found && (!isDead || MO.isDead())) + return i; + } + return -1; +} + +/// findFirstPredOperandIdx() - Find the index of the first operand in the +/// operand list that is used to represent the predicate. It returns -1 if +/// none is found. +int MachineInstr::findFirstPredOperandIdx() const { + const TargetInstrDesc &TID = getDesc(); + if (TID.isPredicable()) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (TID.OpInfo[i].isPredicate()) + return i; + } + + return -1; +} + +/// isRegTiedToUseOperand - Given the index of a register def operand, +/// check if the register def is tied to a source operand, due to either +/// two-address elimination or inline assembly constraints. Returns the +/// first tied use operand index by reference is UseOpIdx is not null. +bool MachineInstr:: +isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { + if (isInlineAsm()) { + assert(DefOpIdx >= 3); + const MachineOperand &MO = getOperand(DefOpIdx); + if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) + return false; + // Determine the actual operand index that corresponds to this index. + unsigned DefNo = 0; + unsigned DefPart = 0; + for (unsigned i = 2, e = getNumOperands(); i < e; ) { + const MachineOperand &FMO = getOperand(i); + // After the normal asm operands there may be additional imp-def regs. + if (!FMO.isImm()) + return false; + // Skip over this def. + unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm()); + unsigned PrevDef = i + 1; + i = PrevDef + NumOps; + if (i > DefOpIdx) { + DefPart = DefOpIdx - PrevDef; + break; + } + ++DefNo; + } + for (unsigned i = 2, e = getNumOperands(); i != e; ++i) { + const MachineOperand &FMO = getOperand(i); + if (!FMO.isImm()) + continue; + if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse()) + continue; + unsigned Idx; + if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) && + Idx == DefNo) { + if (UseOpIdx) + *UseOpIdx = (unsigned)i + 1 + DefPart; + return true; + } + } + return false; + } + + assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!"); + const TargetInstrDesc &TID = getDesc(); + for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.isUse() && + TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) { + if (UseOpIdx) + *UseOpIdx = (unsigned)i; + return true; + } + } + return false; +} + +/// isRegTiedToDefOperand - Return true if the operand of the specified index +/// is a register use and it is tied to an def operand. It also returns the def +/// operand index by reference. +bool MachineInstr:: +isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { + if (isInlineAsm()) { + const MachineOperand &MO = getOperand(UseOpIdx); + if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0) + return false; + + // Find the flag operand corresponding to UseOpIdx + unsigned FlagIdx, NumOps=0; + for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { + const MachineOperand &UFMO = getOperand(FlagIdx); + // After the normal asm operands there may be additional imp-def regs. + if (!UFMO.isImm()) + return false; + NumOps = InlineAsm::getNumOperandRegisters(UFMO.getImm()); + assert(NumOps < getNumOperands() && "Invalid inline asm flag"); + if (UseOpIdx < FlagIdx+NumOps+1) + break; + } + if (FlagIdx >= UseOpIdx) + return false; + const MachineOperand &UFMO = getOperand(FlagIdx); + unsigned DefNo; + if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) { + if (!DefOpIdx) + return true; + + unsigned DefIdx = 2; + // Remember to adjust the index. First operand is asm string, second is + // the AlignStack bit, then there is a flag for each. + while (DefNo) { + const MachineOperand &FMO = getOperand(DefIdx); + assert(FMO.isImm()); + // Skip over this def. + DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1; + --DefNo; + } + *DefOpIdx = DefIdx + UseOpIdx - FlagIdx; + return true; + } + return false; + } + + const TargetInstrDesc &TID = getDesc(); + if (UseOpIdx >= TID.getNumOperands()) + return false; + const MachineOperand &MO = getOperand(UseOpIdx); + if (!MO.isReg() || !MO.isUse()) + return false; + int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO); + if (DefIdx == -1) + return false; + if (DefOpIdx) + *DefOpIdx = (unsigned)DefIdx; + return true; +} + +/// clearKillInfo - Clears kill flags on all operands. +/// +void MachineInstr::clearKillInfo() { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.isUse()) + MO.setIsKill(false); + } +} + +/// copyKillDeadInfo - Copies kill / dead operand properties from MI. +/// +void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || (!MO.isKill() && !MO.isDead())) + continue; + for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) { + MachineOperand &MOp = getOperand(j); + if (!MOp.isIdenticalTo(MO)) + continue; + if (MO.isKill()) + MOp.setIsKill(); + else + MOp.setIsDead(); + break; + } + } +} + +/// copyPredicates - Copies predicate operand(s) from MI. +void MachineInstr::copyPredicates(const MachineInstr *MI) { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isPredicable()) + return; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (TID.OpInfo[i].isPredicate()) { + // Predicated operands must be last operands. + addOperand(MI->getOperand(i)); + } + } +} + +void MachineInstr::substituteRegister(unsigned FromReg, + unsigned ToReg, + unsigned SubIdx, + const TargetRegisterInfo &RegInfo) { + if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (SubIdx) + ToReg = RegInfo.getSubReg(ToReg, SubIdx); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substPhysReg(ToReg, RegInfo); + } + } else { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substVirtReg(ToReg, SubIdx, RegInfo); + } + } +} + +/// isSafeToMove - Return true if it is safe to move this instruction. If +/// SawStore is set to true, it means that there is a store (or call) between +/// the instruction's location and its intended destination. +bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, + AliasAnalysis *AA, + bool &SawStore) const { + // Ignore stuff that we obviously can't move. + if (TID->mayStore() || TID->isCall()) { + SawStore = true; + return false; + } + if (TID->isTerminator() || TID->hasUnmodeledSideEffects()) + return false; + + // See if this instruction does a load. If so, we have to guarantee that the + // loaded value doesn't change between the load and the its intended + // destination. The check for isInvariantLoad gives the targe the chance to + // classify the load as always returning a constant, e.g. a constant pool + // load. + if (TID->mayLoad() && !isInvariantLoad(AA)) + // Otherwise, this is a real load. If there is a store between the load and + // end of block, or if the load is volatile, we can't move it. + return !SawStore && !hasVolatileMemoryRef(); + + return true; +} + +/// isSafeToReMat - Return true if it's safe to rematerialize the specified +/// instruction which defined the specified register instead of copying it. +bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, + AliasAnalysis *AA, + unsigned DstReg) const { + bool SawStore = false; + if (!TII->isTriviallyReMaterializable(this, AA) || + !isSafeToMove(TII, AA, SawStore)) + return false; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg()) + continue; + // FIXME: For now, do not remat any instruction with register operands. + // Later on, we can loosen the restriction is the register operands have + // not been modified between the def and use. Note, this is different from + // MachineSink because the code is no longer in two-address form (at least + // partially). + if (MO.isUse()) + return false; + else if (!MO.isDead() && MO.getReg() != DstReg) + return false; + } + return true; +} + +/// hasVolatileMemoryRef - Return true if this instruction may have a +/// volatile memory reference, or if the information describing the +/// memory reference is not available. Return false if it is known to +/// have no volatile memory references. +bool MachineInstr::hasVolatileMemoryRef() const { + // An instruction known never to access memory won't have a volatile access. + if (!TID->mayStore() && + !TID->mayLoad() && + !TID->isCall() && + !TID->hasUnmodeledSideEffects()) + return false; + + // Otherwise, if the instruction has no memory reference information, + // conservatively assume it wasn't preserved. + if (memoperands_empty()) + return true; + + // Check the memory reference information for volatile references. + for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I) + if ((*I)->isVolatile()) + return true; + + return false; +} + +/// isInvariantLoad - Return true if this instruction is loading from a +/// location whose value is invariant across the function. For example, +/// loading a value from the constant pool or from the argument area +/// of a function if it does not change. This should only return true of +/// *all* loads the instruction does are invariant (if it does multiple loads). +bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { + // If the instruction doesn't load at all, it isn't an invariant load. + if (!TID->mayLoad()) + return false; + + // If the instruction has lost its memoperands, conservatively assume that + // it may not be an invariant load. + if (memoperands_empty()) + return false; + + const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo(); + + for (mmo_iterator I = memoperands_begin(), + E = memoperands_end(); I != E; ++I) { + if ((*I)->isVolatile()) return false; + if ((*I)->isStore()) return false; + + if (const Value *V = (*I)->getValue()) { + // A load from a constant PseudoSourceValue is invariant. + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) + if (PSV->isConstant(MFI)) + continue; + // If we have an AliasAnalysis, ask it whether the memory is constant. + if (AA && AA->pointsToConstantMemory(V)) + continue; + } + + // Otherwise assume conservatively. + return false; + } + + // Everything checks out. + return true; +} + +/// isConstantValuePHI - If the specified instruction is a PHI that always +/// merges together the same virtual register, return the register, otherwise +/// return 0. +unsigned MachineInstr::isConstantValuePHI() const { + if (!isPHI()) + return 0; + assert(getNumOperands() >= 3 && + "It's illegal to have a PHI without source operands"); + + unsigned Reg = getOperand(1).getReg(); + for (unsigned i = 3, e = getNumOperands(); i < e; i += 2) + if (getOperand(i).getReg() != Reg) + return 0; + return Reg; +} + +/// allDefsAreDead - Return true if all the defs of this instruction are dead. +/// +bool MachineInstr::allDefsAreDead() const { + for (unsigned i = 0, e = getNumOperands(); i < e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.isUse()) + continue; + if (!MO.isDead()) + return false; + } + return true; +} + +void MachineInstr::dump() const { + dbgs() << " " << *this; +} + +static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, + raw_ostream &CommentOS) { + const LLVMContext &Ctx = MF->getFunction()->getContext(); + if (!DL.isUnknown()) { // Print source line info. + DIScope Scope(DL.getScope(Ctx)); + // Omit the directory, because it's likely to be long and uninteresting. + if (Scope.Verify()) + CommentOS << Scope.getFilename(); + else + CommentOS << "<unknown>"; + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, MF, CommentOS); + CommentOS << " ]"; + } + } +} + +void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { + // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. + const MachineFunction *MF = 0; + const MachineRegisterInfo *MRI = 0; + if (const MachineBasicBlock *MBB = getParent()) { + MF = MBB->getParent(); + if (!TM && MF) + TM = &MF->getTarget(); + if (MF) + MRI = &MF->getRegInfo(); + } + + // Save a list of virtual registers. + SmallVector<unsigned, 8> VirtRegs; + + // Print explicitly defined operands on the left of an assignment syntax. + unsigned StartOp = 0, e = getNumOperands(); + for (; StartOp < e && getOperand(StartOp).isReg() && + getOperand(StartOp).isDef() && + !getOperand(StartOp).isImplicit(); + ++StartOp) { + if (StartOp != 0) OS << ", "; + getOperand(StartOp).print(OS, TM); + unsigned Reg = getOperand(StartOp).getReg(); + if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) + VirtRegs.push_back(Reg); + } + + if (StartOp != 0) + OS << " = "; + + // Print the opcode name. + OS << getDesc().getName(); + + // Print the rest of the operands. + bool OmittedAnyCallClobbers = false; + bool FirstOp = true; + for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + + if (MO.isReg() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + VirtRegs.push_back(MO.getReg()); + + // Omit call-clobbered registers which aren't used anywhere. This makes + // call instructions much less noisy on targets where calls clobber lots + // of registers. Don't rely on MO.isDead() because we may be called before + // LiveVariables is run, or we may be looking at a non-allocatable reg. + if (MF && getDesc().isCall() && + MO.isReg() && MO.isImplicit() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + const MachineRegisterInfo &MRI = MF->getRegInfo(); + if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) { + bool HasAliasLive = false; + for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg); + unsigned AliasReg = *Alias; ++Alias) + if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) { + HasAliasLive = true; + break; + } + if (!HasAliasLive) { + OmittedAnyCallClobbers = true; + continue; + } + } + } + } + + if (FirstOp) FirstOp = false; else OS << ","; + OS << " "; + if (i < getDesc().NumOperands) { + const TargetOperandInfo &TOI = getDesc().OpInfo[i]; + if (TOI.isPredicate()) + OS << "pred:"; + if (TOI.isOptionalDef()) + OS << "opt:"; + } + if (isDebugValue() && MO.isMetadata()) { + // Pretty print DBG_VALUE instructions. + const MDNode *MD = MO.getMetadata(); + if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2))) + OS << "!\"" << MDS->getString() << '\"'; + else + MO.print(OS, TM); + } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { + OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm()); + } else + MO.print(OS, TM); + } + + // Briefly indicate whether any call clobbers were omitted. + if (OmittedAnyCallClobbers) { + if (!FirstOp) OS << ","; + OS << " ..."; + } + + bool HaveSemi = false; + if (!memoperands_empty()) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + + OS << " mem:"; + for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); + i != e; ++i) { + OS << **i; + if (llvm::next(i) != e) + OS << " "; + } + } + + // Print the regclass of any virtual registers encountered. + if (MRI && !VirtRegs.empty()) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + for (unsigned i = 0; i != VirtRegs.size(); ++i) { + const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); + OS << " " << RC->getName() << ":%reg" << VirtRegs[i]; + for (unsigned j = i+1; j != VirtRegs.size();) { + if (MRI->getRegClass(VirtRegs[j]) != RC) { + ++j; + continue; + } + if (VirtRegs[i] != VirtRegs[j]) + OS << "," << VirtRegs[j]; + VirtRegs.erase(VirtRegs.begin()+j); + } + } + } + + if (!debugLoc.isUnknown() && MF) { + if (!HaveSemi) OS << ";"; + OS << " dbg:"; + printDebugLoc(debugLoc, MF, OS); + } + + OS << "\n"; +} + +bool MachineInstr::addRegisterKilled(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo, + bool AddIfNotFound) { + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg); + bool Found = false; + SmallVector<unsigned,4> DeadOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (Reg == IncomingReg) { + if (!Found) { + if (MO.isKill()) + // The register is already marked kill. + return true; + if (isPhysReg && isRegTiedToDefOperand(i)) + // Two-address uses of physregs must not be marked kill. + return true; + MO.setIsKill(); + Found = true; + } + } else if (hasAliases && MO.isKill() && + TargetRegisterInfo::isPhysicalRegister(Reg)) { + // A super-register kill already exists. + if (RegInfo->isSuperRegister(IncomingReg, Reg)) + return true; + if (RegInfo->isSubRegister(IncomingReg, Reg)) + DeadOps.push_back(i); + } + } + + // Trim unneeded kill operands. + while (!DeadOps.empty()) { + unsigned OpIdx = DeadOps.back(); + if (getOperand(OpIdx).isImplicit()) + RemoveOperand(OpIdx); + else + getOperand(OpIdx).setIsKill(false); + DeadOps.pop_back(); + } + + // If not found, this means an alias of one of the operands is killed. Add a + // new implicit operand if required. + if (!Found && AddIfNotFound) { + addOperand(MachineOperand::CreateReg(IncomingReg, + false /*IsDef*/, + true /*IsImp*/, + true /*IsKill*/)); + return true; + } + return Found; +} + +bool MachineInstr::addRegisterDead(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo, + bool AddIfNotFound) { + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg); + bool Found = false; + SmallVector<unsigned,4> DeadOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (Reg == IncomingReg) { + if (!Found) { + if (MO.isDead()) + // The register is already marked dead. + return true; + MO.setIsDead(); + Found = true; + } + } else if (hasAliases && MO.isDead() && + TargetRegisterInfo::isPhysicalRegister(Reg)) { + // There exists a super-register that's marked dead. + if (RegInfo->isSuperRegister(IncomingReg, Reg)) + return true; + if (RegInfo->getSubRegisters(IncomingReg) && + RegInfo->getSuperRegisters(Reg) && + RegInfo->isSubRegister(IncomingReg, Reg)) + DeadOps.push_back(i); + } + } + + // Trim unneeded dead operands. + while (!DeadOps.empty()) { + unsigned OpIdx = DeadOps.back(); + if (getOperand(OpIdx).isImplicit()) + RemoveOperand(OpIdx); + else + getOperand(OpIdx).setIsDead(false); + DeadOps.pop_back(); + } + + // If not found, this means an alias of one of the operands is dead. Add a + // new implicit operand if required. + if (Found || !AddIfNotFound) + return Found; + + addOperand(MachineOperand::CreateReg(IncomingReg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/, + true /*IsDead*/)); + return true; +} + +void MachineInstr::addRegisterDefined(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo) { + if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) { + MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo); + if (MO) + return; + } else { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() && + MO.getSubReg() == 0) + return; + } + } + addOperand(MachineOperand::CreateReg(IncomingReg, + true /*IsDef*/, + true /*IsImp*/)); +} + +void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs, + const TargetRegisterInfo &TRI) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + bool Dead = true; + for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(), + E = UsedRegs.end(); I != E; ++I) + if (TRI.regsOverlap(*I, Reg)) { + Dead = false; + break; + } + // If there are no uses, including partial uses, the def is dead. + if (Dead) MO.setIsDead(); + } +} + +unsigned +MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { + unsigned Hash = MI->getOpcode() * 37; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + uint64_t Key = (uint64_t)MO.getType() << 32; + switch (MO.getType()) { + default: break; + case MachineOperand::MO_Register: + if (MO.isDef() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; // Skip virtual register defs. + Key |= MO.getReg(); + break; + case MachineOperand::MO_Immediate: + Key |= MO.getImm(); + break; + case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + Key |= MO.getIndex(); + break; + case MachineOperand::MO_MachineBasicBlock: + Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB()); + break; + case MachineOperand::MO_GlobalAddress: + Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal()); + break; + case MachineOperand::MO_BlockAddress: + Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress()); + break; + case MachineOperand::MO_MCSymbol: + Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol()); + break; + } + Key += ~(Key << 32); + Key ^= (Key >> 22); + Key += ~(Key << 13); + Key ^= (Key >> 8); + Key += (Key << 3); + Key ^= (Key >> 15); + Key += ~(Key << 27); + Key ^= (Key >> 31); + Hash = (unsigned)Key + Hash * 37; + } + return Hash; +} diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp new file mode 100644 index 0000000..1a74b74 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -0,0 +1,842 @@ +//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs loop invariant code motion on machine instructions. We +// attempt to remove as much code from the body of a loop as possible. +// +// This pass does not attempt to throttle itself to limit register pressure. +// The register allocation phases are expected to perform rematerialization +// to recover when register pressure is high. +// +// This pass is not intended to be a replacement or a complete alternative +// for the LLVM-IR-level LICM pass. It is only designed to hoist simple +// constructs that are not exposed before lowering and instruction selection. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-licm" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); +STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); +STATISTIC(NumPostRAHoisted, + "Number of machine instructions hoisted out of loops post regalloc"); + +namespace { + class MachineLICM : public MachineFunctionPass { + bool PreRegAlloc; + + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MachineFrameInfo *MFI; + MachineRegisterInfo *RegInfo; + + // Various analyses that we use... + AliasAnalysis *AA; // Alias analysis info. + MachineLoopInfo *MLI; // Current MachineLoopInfo + MachineDominatorTree *DT; // Machine dominator tree for the cur loop + + // State that is updated as we process loops + bool Changed; // True if a loop is changed. + bool FirstInLoop; // True if it's the first LICM in the loop. + MachineLoop *CurLoop; // The current loop we are working on. + MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + + BitVector AllocatableSet; + + // For each opcode, keep a list of potential CSE instructions. + DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; + + public: + static char ID; // Pass identification, replacement for typeid + MachineLICM() : + MachineFunctionPass(ID), PreRegAlloc(true) {} + + explicit MachineLICM(bool PreRA) : + MachineFunctionPass(ID), PreRegAlloc(PreRA) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "Machine Instruction LICM"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual void releaseMemory() { + CSEMap.clear(); + } + + private: + /// CandidateInfo - Keep track of information about hoisting candidates. + struct CandidateInfo { + MachineInstr *MI; + unsigned Def; + int FI; + CandidateInfo(MachineInstr *mi, unsigned def, int fi) + : MI(mi), Def(def), FI(fi) {} + }; + + /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop + /// invariants out to the preheader. + void HoistRegionPostRA(); + + /// HoistPostRA - When an instruction is found to only use loop invariant + /// operands that is safe to hoist, this instruction is called to do the + /// dirty work. + void HoistPostRA(MachineInstr *MI, unsigned Def); + + /// ProcessMI - Examine the instruction for potentai LICM candidate. Also + /// gather register def and frame object update information. + void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs, + SmallSet<int, 32> &StoredFIs, + SmallVector<CandidateInfo, 32> &Candidates); + + /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the + /// current loop. + void AddToLiveIns(unsigned Reg); + + /// IsLICMCandidate - Returns true if the instruction may be a suitable + /// candidate for LICM. e.g. If the instruction is a call, then it's + /// obviously not safe to hoist it. + bool IsLICMCandidate(MachineInstr &I); + + /// IsLoopInvariantInst - Returns true if the instruction is loop + /// invariant. I.e., all virtual register operands are defined outside of + /// the loop, physical registers aren't accessed (explicitly or implicitly), + /// and the instruction is hoistable. + /// + bool IsLoopInvariantInst(MachineInstr &I); + + /// IsProfitableToHoist - Return true if it is potentially profitable to + /// hoist the given loop invariant. + bool IsProfitableToHoist(MachineInstr &MI); + + /// HoistRegion - Walk the specified region of the CFG (defined by all + /// blocks dominated by the specified block, and that are in the current + /// loop) in depth first order w.r.t the DominatorTree. This allows us to + /// visit definitions before uses, allowing us to hoist a loop body in one + /// pass without iteration. + /// + void HoistRegion(MachineDomTreeNode *N); + + /// isLoadFromConstantMemory - Return true if the given instruction is a + /// load from constant memory. + bool isLoadFromConstantMemory(MachineInstr *MI); + + /// ExtractHoistableLoad - Unfold a load from the given machineinstr if + /// the load itself could be hoisted. Return the unfolded and hoistable + /// load, or null if the load couldn't be unfolded or if it wouldn't + /// be hoistable. + MachineInstr *ExtractHoistableLoad(MachineInstr *MI); + + /// LookForDuplicate - Find an instruction amount PrevMIs that is a + /// duplicate of MI. Return this instruction if it's found. + const MachineInstr *LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr*> &PrevMIs); + + /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on + /// the preheader that compute the same value. If it's found, do a RAU on + /// with the definition of the existing instruction rather than hoisting + /// the instruction to the preheader. + bool EliminateCSE(MachineInstr *MI, + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); + + /// Hoist - When an instruction is found to only use loop invariant operands + /// that is safe to hoist, this instruction is called to do the dirty work. + /// + void Hoist(MachineInstr *MI); + + /// InitCSEMap - Initialize the CSE map with instructions that are in the + /// current loop preheader that may become duplicates of instructions that + /// are hoisted out of the loop. + void InitCSEMap(MachineBasicBlock *BB); + + /// getCurPreheader - Get the preheader for the current loop, splitting + /// a critical edge if needed. + MachineBasicBlock *getCurPreheader(); + }; +} // end anonymous namespace + +char MachineLICM::ID = 0; +INITIALIZE_PASS(MachineLICM, "machinelicm", + "Machine Loop Invariant Code Motion", false, false); + +FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { + return new MachineLICM(PreRegAlloc); +} + +/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most +/// loop that has a unique predecessor. +static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { + // Check whether this loop even has a unique predecessor. + if (!CurLoop->getLoopPredecessor()) + return false; + // Ok, now check to see if any of its outer loops do. + for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) + if (L->getLoopPredecessor()) + return false; + // None of them did, so this is the outermost with a unique predecessor. + return true; +} + +bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { + if (PreRegAlloc) + DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n"); + else + DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n"); + + Changed = FirstInLoop = false; + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + TRI = TM->getRegisterInfo(); + MFI = MF.getFrameInfo(); + RegInfo = &MF.getRegInfo(); + AllocatableSet = TRI->getAllocatableSet(MF); + + // Get our Loop information... + MLI = &getAnalysis<MachineLoopInfo>(); + DT = &getAnalysis<MachineDominatorTree>(); + AA = &getAnalysis<AliasAnalysis>(); + + SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); + while (!Worklist.empty()) { + CurLoop = Worklist.pop_back_val(); + CurPreheader = 0; + + // If this is done before regalloc, only visit outer-most preheader-sporting + // loops. + if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { + Worklist.append(CurLoop->begin(), CurLoop->end()); + continue; + } + + if (!PreRegAlloc) + HoistRegionPostRA(); + else { + // CSEMap is initialized for loop header when the first instruction is + // being hoisted. + MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); + FirstInLoop = true; + HoistRegion(N); + CSEMap.clear(); + } + } + + return Changed; +} + +/// InstructionStoresToFI - Return true if instruction stores to the +/// specified frame. +static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); o != oe; ++o) { + if (!(*o)->isStore() || !(*o)->getValue()) + continue; + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + if (Value->getFrameIndex() == FI) + return true; + } + } + return false; +} + +/// ProcessMI - Examine the instruction for potentai LICM candidate. Also +/// gather register def and frame object update information. +void MachineLICM::ProcessMI(MachineInstr *MI, + unsigned *PhysRegDefs, + SmallSet<int, 32> &StoredFIs, + SmallVector<CandidateInfo, 32> &Candidates) { + bool RuledOut = false; + bool HasNonInvariantUse = false; + unsigned Def = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isFI()) { + // Remember if the instruction stores to the frame index. + int FI = MO.getIndex(); + if (!StoredFIs.count(FI) && + MFI->isSpillSlotObjectIndex(FI) && + InstructionStoresToFI(MI, FI)) + StoredFIs.insert(FI); + HasNonInvariantUse = true; + continue; + } + + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Not expecting virtual register!"); + + if (!MO.isDef()) { + if (Reg && PhysRegDefs[Reg]) + // If it's using a non-loop-invariant register, then it's obviously not + // safe to hoist. + HasNonInvariantUse = true; + continue; + } + + if (MO.isImplicit()) { + ++PhysRegDefs[Reg]; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + ++PhysRegDefs[*AS]; + if (!MO.isDead()) + // Non-dead implicit def? This cannot be hoisted. + RuledOut = true; + // No need to check if a dead implicit def is also defined by + // another instruction. + continue; + } + + // FIXME: For now, avoid instructions with multiple defs, unless + // it's a dead implicit def. + if (Def) + RuledOut = true; + else + Def = Reg; + + // If we have already seen another instruction that defines the same + // register, then this is not safe. + if (++PhysRegDefs[Reg] > 1) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + if (++PhysRegDefs[*AS] > 1) + RuledOut = true; + } + + // Only consider reloads for now and remats which do not have register + // operands. FIXME: Consider unfold load folding instructions. + if (Def && !RuledOut) { + int FI = INT_MIN; + if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || + (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) + Candidates.push_back(CandidateInfo(MI, Def, FI)); + } +} + +/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop +/// invariants out to the preheader. +void MachineLICM::HoistRegionPostRA() { + unsigned NumRegs = TRI->getNumRegs(); + unsigned *PhysRegDefs = new unsigned[NumRegs]; + std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0); + + SmallVector<CandidateInfo, 32> Candidates; + SmallSet<int, 32> StoredFIs; + + // Walk the entire region, count number of defs for each register, and + // collect potential LICM candidates. + const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + // Conservatively treat live-in's as an external def. + // FIXME: That means a reload that're reused in successor block(s) will not + // be LICM'ed. + for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), + E = BB->livein_end(); I != E; ++I) { + unsigned Reg = *I; + ++PhysRegDefs[Reg]; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + ++PhysRegDefs[*AS]; + } + + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ++MII) { + MachineInstr *MI = &*MII; + ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates); + } + } + + // Now evaluate whether the potential candidates qualify. + // 1. Check if the candidate defined register is defined by another + // instruction in the loop. + // 2. If the candidate is a load from stack slot (always true for now), + // check if the slot is stored anywhere in the loop. + for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { + if (Candidates[i].FI != INT_MIN && + StoredFIs.count(Candidates[i].FI)) + continue; + + if (PhysRegDefs[Candidates[i].Def] == 1) { + bool Safe = true; + MachineInstr *MI = Candidates[i].MI; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg() || MO.isDef() || !MO.getReg()) + continue; + if (PhysRegDefs[MO.getReg()]) { + // If it's using a non-loop-invariant register, then it's obviously + // not safe to hoist. + Safe = false; + break; + } + } + if (Safe) + HoistPostRA(MI, Candidates[i].Def); + } + } + + delete[] PhysRegDefs; +} + +/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current +/// loop, and make sure it is not killed by any instructions in the loop. +void MachineLICM::AddToLiveIns(unsigned Reg) { + const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + if (!BB->isLiveIn(Reg)) + BB->addLiveIn(Reg); + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; + if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) + MO.setIsKill(false); + } + } + } +} + +/// HoistPostRA - When an instruction is found to only use loop invariant +/// operands that is safe to hoist, this instruction is called to do the +/// dirty work. +void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) return; + + // Now move the instructions to the predecessor, inserting it before any + // terminator instructions. + DEBUG({ + dbgs() << "Hoisting " << *MI; + if (Preheader->getBasicBlock()) + dbgs() << " to MachineBasicBlock " + << Preheader->getName(); + if (MI->getParent()->getBasicBlock()) + dbgs() << " from MachineBasicBlock " + << MI->getParent()->getName(); + dbgs() << "\n"; + }); + + // Splice the instruction to the preheader. + MachineBasicBlock *MBB = MI->getParent(); + Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); + + // Add register to livein list to all the BBs in the current loop since a + // loop invariant must be kept live throughout the whole loop. This is + // important to ensure later passes do not scavenge the def register. + AddToLiveIns(Def); + + ++NumPostRAHoisted; + Changed = true; +} + +/// HoistRegion - Walk the specified region of the CFG (defined by all blocks +/// dominated by the specified block, and that are in the current loop) in depth +/// first order w.r.t the DominatorTree. This allows us to visit definitions +/// before uses, allowing us to hoist a loop body in one pass without iteration. +/// +void MachineLICM::HoistRegion(MachineDomTreeNode *N) { + assert(N != 0 && "Null dominator tree node?"); + MachineBasicBlock *BB = N->getBlock(); + + // If this subregion is not in the top level loop at all, exit. + if (!CurLoop->contains(BB)) return; + + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ) { + MachineBasicBlock::iterator NextMII = MII; ++NextMII; + Hoist(&*MII); + MII = NextMII; + } + + // Don't hoist things out of a large switch statement. This often causes + // code to be hoisted that wasn't going to be executed, and increases + // register pressure in a situation where it's likely to matter. + if (BB->succ_size() < 25) { + const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); + for (unsigned I = 0, E = Children.size(); I != E; ++I) + HoistRegion(Children[I]); + } +} + +/// IsLICMCandidate - Returns true if the instruction may be a suitable +/// candidate for LICM. e.g. If the instruction is a call, then it's obviously +/// not safe to hoist it. +bool MachineLICM::IsLICMCandidate(MachineInstr &I) { + // Check if it's safe to move the instruction. + bool DontMoveAcrossStore = true; + if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) + return false; + + return true; +} + +/// IsLoopInvariantInst - Returns true if the instruction is loop +/// invariant. I.e., all virtual register operands are defined outside of the +/// loop, physical registers aren't accessed explicitly, and there are no side +/// effects that aren't captured by the operands or other flags. +/// +bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { + if (!IsLICMCandidate(I)) + return false; + + // The instruction is loop invariant if all of its operands are. + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = I.getOperand(i); + + if (!MO.isReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + // Don't hoist an instruction that uses or defines a physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!RegInfo->def_empty(Reg)) + return false; + if (AllocatableSet.test(Reg)) + return false; + // Check for a def among the register's aliases too. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (!RegInfo->def_empty(AliasReg)) + return false; + if (AllocatableSet.test(AliasReg)) + return false; + } + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead. We can't move it. + return false; + } else if (CurLoop->getHeader()->isLiveIn(Reg)) { + // If the reg is live into the loop, we can't hoist an instruction + // which would clobber it. + return false; + } + } + + if (!MO.isUse()) + continue; + + assert(RegInfo->getVRegDef(Reg) && + "Machine instr not mapped for this vreg?!"); + + // If the loop contains the definition of an operand, then the instruction + // isn't loop invariant. + if (CurLoop->contains(RegInfo->getVRegDef(Reg))) + return false; + } + + // If we got this far, the instruction is loop invariant! + return true; +} + + +/// HasPHIUses - Return true if the specified register has any PHI use. +static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) { + for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg), + UE = RegInfo->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI->isPHI()) + return true; + } + return false; +} + +/// isLoadFromConstantMemory - Return true if the given instruction is a +/// load from constant memory. Machine LICM will hoist these even if they are +/// not re-materializable. +bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { + if (!MI->getDesc().mayLoad()) return false; + if (!MI->hasOneMemOperand()) return false; + MachineMemOperand *MMO = *MI->memoperands_begin(); + if (MMO->isVolatile()) return false; + if (!MMO->getValue()) return false; + const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue()); + if (PSV) { + MachineFunction &MF = *MI->getParent()->getParent(); + return PSV->isConstant(MF.getFrameInfo()); + } else { + return AA->pointsToConstantMemory(MMO->getValue()); + } +} + +/// IsProfitableToHoist - Return true if it is potentially profitable to hoist +/// the given loop invariant. +bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { + // FIXME: For now, only hoist re-materilizable instructions. LICM will + // increase register pressure. We want to make sure it doesn't increase + // spilling. + // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting + // these tend to help performance in low register pressure situation. The + // trade off is it may cause spill in high pressure situation. It will end up + // adding a store in the loop preheader. But the reload is no more expensive. + // The side benefit is these loads are frequently CSE'ed. + if (!TII->isTriviallyReMaterializable(&MI, AA)) { + if (!isLoadFromConstantMemory(&MI)) + return false; + } + + // If result(s) of this instruction is used by PHIs, then don't hoist it. + // The presence of joins makes it difficult for current register allocator + // implementation to perform remat. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + if (HasPHIUses(MO.getReg(), RegInfo)) + return false; + } + + return true; +} + +MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { + // If not, we may be able to unfold a load and hoist that. + // First test whether the instruction is loading from an amenable + // memory location. + if (!isLoadFromConstantMemory(MI)) + return 0; + + // Next determine the register class for a temporary register. + unsigned LoadRegIndex; + unsigned NewOpc = + TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, + &LoadRegIndex); + if (NewOpc == 0) return 0; + const TargetInstrDesc &TID = TII->get(NewOpc); + if (TID.getNumDefs() != 1) return 0; + const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); + // Ok, we're unfolding. Create a temporary register and do the unfold. + unsigned Reg = RegInfo->createVirtualRegister(RC); + + MachineFunction &MF = *MI->getParent()->getParent(); + SmallVector<MachineInstr *, 2> NewMIs; + bool Success = + TII->unfoldMemoryOperand(MF, MI, Reg, + /*UnfoldLoad=*/true, /*UnfoldStore=*/false, + NewMIs); + (void)Success; + assert(Success && + "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " + "succeeded!"); + assert(NewMIs.size() == 2 && + "Unfolded a load into multiple instructions!"); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MI, NewMIs[0]); + MBB->insert(MI, NewMIs[1]); + // If unfolding produced a load that wasn't loop-invariant or profitable to + // hoist, discard the new instructions and bail. + if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { + NewMIs[0]->eraseFromParent(); + NewMIs[1]->eraseFromParent(); + return 0; + } + // Otherwise we successfully unfolded a load that we can hoist. + MI->eraseFromParent(); + return NewMIs[0]; +} + +void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { + for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { + const MachineInstr *MI = &*I; + // FIXME: For now, only hoist re-materilizable instructions. LICM will + // increase register pressure. We want to make sure it doesn't increase + // spilling. + if (TII->isTriviallyReMaterializable(MI, AA)) { + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator + CI = CSEMap.find(Opcode); + if (CI != CSEMap.end()) + CI->second.push_back(MI); + else { + std::vector<const MachineInstr*> CSEMIs; + CSEMIs.push_back(MI); + CSEMap.insert(std::make_pair(Opcode, CSEMIs)); + } + } + } +} + +const MachineInstr* +MachineLICM::LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr*> &PrevMIs) { + for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { + const MachineInstr *PrevMI = PrevMIs[i]; + if (TII->produceSameValue(MI, PrevMI)) + return PrevMI; + } + return 0; +} + +bool MachineLICM::EliminateCSE(MachineInstr *MI, + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { + // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate + // the undef property onto uses. + if (CI == CSEMap.end() || MI->isImplicitDef()) + return false; + + if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { + DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); + + // Replace virtual registers defined by MI by their counterparts defined + // by Dup. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + // Physical registers may not differ here. + assert((!MO.isReg() || MO.getReg() == 0 || + !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + MO.getReg() == Dup->getOperand(i).getReg()) && + "Instructions with different phys regs are not identical!"); + + if (MO.isReg() && MO.isDef() && + !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); + RegInfo->clearKillFlags(Dup->getOperand(i).getReg()); + } + } + MI->eraseFromParent(); + ++NumCSEed; + return true; + } + return false; +} + +/// Hoist - When an instruction is found to use only loop invariant operands +/// that are safe to hoist, this instruction is called to do the dirty work. +/// +void MachineLICM::Hoist(MachineInstr *MI) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) return; + + // First check whether we should hoist this instruction. + if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { + // If not, try unfolding a hoistable load. + MI = ExtractHoistableLoad(MI); + if (!MI) return; + } + + // Now move the instructions to the predecessor, inserting it before any + // terminator instructions. + DEBUG({ + dbgs() << "Hoisting " << *MI; + if (Preheader->getBasicBlock()) + dbgs() << " to MachineBasicBlock " + << Preheader->getName(); + if (MI->getParent()->getBasicBlock()) + dbgs() << " from MachineBasicBlock " + << MI->getParent()->getName(); + dbgs() << "\n"; + }); + + // If this is the first instruction being hoisted to the preheader, + // initialize the CSE map with potential common expressions. + if (FirstInLoop) { + InitCSEMap(Preheader); + FirstInLoop = false; + } + + // Look for opportunity to CSE the hoisted instruction. + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator + CI = CSEMap.find(Opcode); + if (!EliminateCSE(MI, CI)) { + // Otherwise, splice the instruction to the preheader. + Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); + + // Clear the kill flags of any register this instruction defines, + // since they may need to be live throughout the entire loop + // rather than just live for part of it. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && !MO.isDead()) + RegInfo->clearKillFlags(MO.getReg()); + } + + // Add to the CSE map. + if (CI != CSEMap.end()) + CI->second.push_back(MI); + else { + std::vector<const MachineInstr*> CSEMIs; + CSEMIs.push_back(MI); + CSEMap.insert(std::make_pair(Opcode, CSEMIs)); + } + } + + ++NumHoisted; + Changed = true; +} + +MachineBasicBlock *MachineLICM::getCurPreheader() { + // Determine the block to which to hoist instructions. If we can't find a + // suitable loop predecessor, we can't do any hoisting. + + // If we've tried to get a preheader and failed, don't try again. + if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) + return 0; + + if (!CurPreheader) { + CurPreheader = CurLoop->getLoopPreheader(); + if (!CurPreheader) { + MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); + if (!Pred) { + CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); + return 0; + } + + CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); + if (!CurPreheader) { + CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); + return 0; + } + } + } + return CurPreheader; +} diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp new file mode 100644 index 0000000..bca4b0c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -0,0 +1,80 @@ +//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachineLoopInfo class that is used to identify natural +// loops and determine the loop depth of various nodes of the CFG. Note that +// the loops identified may actually be several natural loops that share the +// same header node... not just a single natural loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +namespace llvm { +#define MLB class LoopBase<MachineBasicBlock, MachineLoop> +TEMPLATE_INSTANTIATION(MLB); +#undef MLB +#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop> +TEMPLATE_INSTANTIATION(MLIB); +#undef MLIB +} + +char MachineLoopInfo::ID = 0; +INITIALIZE_PASS(MachineLoopInfo, "machine-loops", + "Machine Natural Loop Construction", true, true); + +char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; + +bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { + releaseMemory(); + LI.Calculate(getAnalysis<MachineDominatorTree>().getBase()); // Update + return false; +} + +void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineBasicBlock *MachineLoop::getTopBlock() { + MachineBasicBlock *TopMBB = getHeader(); + MachineFunction::iterator Begin = TopMBB->getParent()->begin(); + if (TopMBB != Begin) { + MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB)); + while (contains(PriorMBB)) { + TopMBB = PriorMBB; + if (TopMBB == Begin) break; + PriorMBB = prior(MachineFunction::iterator(TopMBB)); + } + } + return TopMBB; +} + +MachineBasicBlock *MachineLoop::getBottomBlock() { + MachineBasicBlock *BotMBB = getHeader(); + MachineFunction::iterator End = BotMBB->getParent()->end(); + if (BotMBB != prior(End)) { + MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); + while (contains(NextMBB)) { + BotMBB = NextMBB; + if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break; + NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); + } + } + return BotMBB; +} + +void MachineLoop::dump() const { + print(dbgs()); +} diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp new file mode 100644 index 0000000..b647a4d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -0,0 +1,581 @@ +//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleInfo.h" + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; +using namespace llvm::dwarf; + +// Handle the Pass registration stuff necessary to use TargetData's. +INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", + "Machine Module Information", false, false); +char MachineModuleInfo::ID = 0; + +// Out of line virtual method. +MachineModuleInfoImpl::~MachineModuleInfoImpl() {} + +namespace llvm { +class MMIAddrLabelMapCallbackPtr : CallbackVH { + MMIAddrLabelMap *Map; +public: + MMIAddrLabelMapCallbackPtr() : Map(0) {} + MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {} + + void setPtr(BasicBlock *BB) { + ValueHandleBase::operator=(BB); + } + + void setMap(MMIAddrLabelMap *map) { Map = map; } + + virtual void deleted(); + virtual void allUsesReplacedWith(Value *V2); +}; + +class MMIAddrLabelMap { + MCContext &Context; + struct AddrLabelSymEntry { + /// Symbols - The symbols for the label. This is a pointer union that is + /// either one symbol (the common case) or a list of symbols. + PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols; + + Function *Fn; // The containing function of the BasicBlock. + unsigned Index; // The index in BBCallbacks for the BasicBlock. + }; + + DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols; + + /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We + /// use this so we get notified if a block is deleted or RAUWd. + std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks; + + /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols + /// whose corresponding BasicBlock got deleted. These symbols need to be + /// emitted at some point in the file, so AsmPrinter emits them after the + /// function body. + DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> > + DeletedAddrLabelsNeedingEmission; +public: + + MMIAddrLabelMap(MCContext &context) : Context(context) {} + ~MMIAddrLabelMap() { + assert(DeletedAddrLabelsNeedingEmission.empty() && + "Some labels for deleted blocks never got emitted"); + + // Deallocate any of the 'list of symbols' case. + for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator + I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I) + if (I->second.Symbols.is<std::vector<MCSymbol*>*>()) + delete I->second.Symbols.get<std::vector<MCSymbol*>*>(); + } + + MCSymbol *getAddrLabelSymbol(BasicBlock *BB); + std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB); + + void takeDeletedSymbolsForFunction(Function *F, + std::vector<MCSymbol*> &Result); + + void UpdateForDeletedBlock(BasicBlock *BB); + void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); +}; +} + +MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) { + assert(BB->hasAddressTaken() && + "Shouldn't get label for block without address taken"); + AddrLabelSymEntry &Entry = AddrLabelSymbols[BB]; + + // If we already had an entry for this block, just return it. + if (!Entry.Symbols.isNull()) { + assert(BB->getParent() == Entry.Fn && "Parent changed"); + if (Entry.Symbols.is<MCSymbol*>()) + return Entry.Symbols.get<MCSymbol*>(); + return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0]; + } + + // Otherwise, this is a new entry, create a new symbol for it and add an + // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd. + BBCallbacks.push_back(BB); + BBCallbacks.back().setMap(this); + Entry.Index = BBCallbacks.size()-1; + Entry.Fn = BB->getParent(); + MCSymbol *Result = Context.CreateTempSymbol(); + Entry.Symbols = Result; + return Result; +} + +std::vector<MCSymbol*> +MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { + assert(BB->hasAddressTaken() && + "Shouldn't get label for block without address taken"); + AddrLabelSymEntry &Entry = AddrLabelSymbols[BB]; + + std::vector<MCSymbol*> Result; + + // If we already had an entry for this block, just return it. + if (Entry.Symbols.isNull()) + Result.push_back(getAddrLabelSymbol(BB)); + else if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) + Result.push_back(Sym); + else + Result = *Entry.Symbols.get<std::vector<MCSymbol*>*>(); + return Result; +} + + +/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return +/// them. +void MMIAddrLabelMap:: +takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) { + DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I = + DeletedAddrLabelsNeedingEmission.find(F); + + // If there are no entries for the function, just return. + if (I == DeletedAddrLabelsNeedingEmission.end()) return; + + // Otherwise, take the list. + std::swap(Result, I->second); + DeletedAddrLabelsNeedingEmission.erase(I); +} + + +void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { + // If the block got deleted, there is no need for the symbol. If the symbol + // was already emitted, we can just forget about it, otherwise we need to + // queue it up for later emission when the function is output. + AddrLabelSymEntry Entry = AddrLabelSymbols[BB]; + AddrLabelSymbols.erase(BB); + assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?"); + BBCallbacks[Entry.Index] = 0; // Clear the callback. + + assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) && + "Block/parent mismatch"); + + // Handle both the single and the multiple symbols cases. + if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) { + if (Sym->isDefined()) + return; + + // If the block is not yet defined, we need to emit it at the end of the + // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list + // for the containing Function. Since the block is being deleted, its + // parent may already be removed, we have to get the function from 'Entry'. + DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); + } else { + std::vector<MCSymbol*> *Syms = Entry.Symbols.get<std::vector<MCSymbol*>*>(); + + for (unsigned i = 0, e = Syms->size(); i != e; ++i) { + MCSymbol *Sym = (*Syms)[i]; + if (Sym->isDefined()) continue; // Ignore already emitted labels. + + // If the block is not yet defined, we need to emit it at the end of the + // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list + // for the containing Function. Since the block is being deleted, its + // parent may already be removed, we have to get the function from + // 'Entry'. + DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); + } + + // The entry is deleted, free the memory associated with the symbol list. + delete Syms; + } +} + +void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { + // Get the entry for the RAUW'd block and remove it from our map. + AddrLabelSymEntry OldEntry = AddrLabelSymbols[Old]; + AddrLabelSymbols.erase(Old); + assert(!OldEntry.Symbols.isNull() && "Didn't have a symbol, why a callback?"); + + AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New]; + + // If New is not address taken, just move our symbol over to it. + if (NewEntry.Symbols.isNull()) { + BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback. + NewEntry = OldEntry; // Set New's entry. + return; + } + + BBCallbacks[OldEntry.Index] = 0; // Update the callback. + + // Otherwise, we need to add the old symbol to the new block's set. If it is + // just a single entry, upgrade it to a symbol list. + if (MCSymbol *PrevSym = NewEntry.Symbols.dyn_cast<MCSymbol*>()) { + std::vector<MCSymbol*> *SymList = new std::vector<MCSymbol*>(); + SymList->push_back(PrevSym); + NewEntry.Symbols = SymList; + } + + std::vector<MCSymbol*> *SymList = + NewEntry.Symbols.get<std::vector<MCSymbol*>*>(); + + // If the old entry was a single symbol, add it. + if (MCSymbol *Sym = OldEntry.Symbols.dyn_cast<MCSymbol*>()) { + SymList->push_back(Sym); + return; + } + + // Otherwise, concatenate the list. + std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>(); + SymList->insert(SymList->end(), Syms->begin(), Syms->end()); + delete Syms; +} + + +void MMIAddrLabelMapCallbackPtr::deleted() { + Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr())); +} + +void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { + Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2)); +} + + +//===----------------------------------------------------------------------===// + +MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI) +: ImmutablePass(ID), Context(MAI), + ObjFileMMI(0), + CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){ + // Always emit some info, by default "no personality" info. + Personalities.push_back(NULL); + AddrLabelSymbols = 0; + TheModule = 0; +} + +MachineModuleInfo::MachineModuleInfo() +: ImmutablePass(ID), Context(*(MCAsmInfo*)0) { + assert(0 && "This MachineModuleInfo constructor should never be called, MMI " + "should always be explicitly constructed by LLVMTargetMachine"); + abort(); +} + +MachineModuleInfo::~MachineModuleInfo() { + delete ObjFileMMI; + + // FIXME: Why isn't doFinalization being called?? + //assert(AddrLabelSymbols == 0 && "doFinalization not called"); + delete AddrLabelSymbols; + AddrLabelSymbols = 0; +} + +/// doInitialization - Initialize the state for a new module. +/// +bool MachineModuleInfo::doInitialization() { + assert(AddrLabelSymbols == 0 && "Improperly initialized"); + return false; +} + +/// doFinalization - Tear down the state after completion of a module. +/// +bool MachineModuleInfo::doFinalization() { + delete AddrLabelSymbols; + AddrLabelSymbols = 0; + return false; +} + +/// EndFunction - Discard function meta information. +/// +void MachineModuleInfo::EndFunction() { + // Clean up frame info. + FrameMoves.clear(); + + // Clean up exception info. + LandingPads.clear(); + CallSiteMap.clear(); + TypeInfos.clear(); + FilterIds.clear(); + FilterEnds.clear(); + CallsEHReturn = 0; + CallsUnwindInit = 0; + VariableDbgInfo.clear(); +} + +/// AnalyzeModule - Scan the module for global debug information. +/// +void MachineModuleInfo::AnalyzeModule(const Module &M) { + // Insert functions in the llvm.used array (but not llvm.compiler.used) into + // UsedFunctions. + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (const Function *F = + dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts())) + UsedFunctions.insert(F); +} + +//===- Address of Block Management ----------------------------------------===// + + +/// getAddrLabelSymbol - Return the symbol to be used for the specified basic +/// block when its address is taken. This cannot be its normal LBB label +/// because the block may be accessed outside its containing function. +MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) { + // Lazily create AddrLabelSymbols. + if (AddrLabelSymbols == 0) + AddrLabelSymbols = new MMIAddrLabelMap(Context); + return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB)); +} + +/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified +/// basic block when its address is taken. If other blocks were RAUW'd to +/// this one, we may have to emit them as well, return the whole set. +std::vector<MCSymbol*> MachineModuleInfo:: +getAddrLabelSymbolToEmit(const BasicBlock *BB) { + // Lazily create AddrLabelSymbols. + if (AddrLabelSymbols == 0) + AddrLabelSymbols = new MMIAddrLabelMap(Context); + return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB)); +} + + +/// takeDeletedSymbolsForFunction - If the specified function has had any +/// references to address-taken blocks generated, but the block got deleted, +/// return the symbol now so we can emit it. This prevents emitting a +/// reference to a symbol that has no definition. +void MachineModuleInfo:: +takeDeletedSymbolsForFunction(const Function *F, + std::vector<MCSymbol*> &Result) { + // If no blocks have had their addresses taken, we're done. + if (AddrLabelSymbols == 0) return; + return AddrLabelSymbols-> + takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result); +} + +//===- EH -----------------------------------------------------------------===// + +/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the +/// specified MachineBasicBlock. +LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo + (MachineBasicBlock *LandingPad) { + unsigned N = LandingPads.size(); + for (unsigned i = 0; i < N; ++i) { + LandingPadInfo &LP = LandingPads[i]; + if (LP.LandingPadBlock == LandingPad) + return LP; + } + + LandingPads.push_back(LandingPadInfo(LandingPad)); + return LandingPads[N]; +} + +/// addInvoke - Provide the begin and end labels of an invoke style call and +/// associate it with a try landing pad block. +void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad, + MCSymbol *BeginLabel, MCSymbol *EndLabel) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.BeginLabels.push_back(BeginLabel); + LP.EndLabels.push_back(EndLabel); +} + +/// addLandingPad - Provide the label of a try LandingPad block. +/// +MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { + MCSymbol *LandingPadLabel = Context.CreateTempSymbol(); + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.LandingPadLabel = LandingPadLabel; + return LandingPadLabel; +} + +/// addPersonality - Provide the personality function for the exception +/// information. +void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, + const Function *Personality) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.Personality = Personality; + + for (unsigned i = 0; i < Personalities.size(); ++i) + if (Personalities[i] == Personality) + return; + + // If this is the first personality we're adding go + // ahead and add it at the beginning. + if (Personalities[0] == NULL) + Personalities[0] = Personality; + else + Personalities.push_back(Personality); +} + +/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. +/// +void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad, + std::vector<const GlobalVariable *> &TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + for (unsigned N = TyInfo.size(); N; --N) + LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); +} + +/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad. +/// +void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad, + std::vector<const GlobalVariable *> &TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + std::vector<unsigned> IdsInFilter(TyInfo.size()); + for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) + IdsInFilter[I] = getTypeIDFor(TyInfo[I]); + LP.TypeIds.push_back(getFilterIDFor(IdsInFilter)); +} + +/// addCleanup - Add a cleanup action for a landing pad. +/// +void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.TypeIds.push_back(0); +} + +/// TidyLandingPads - Remap landing pad labels and remove any deleted landing +/// pads. +void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { + for (unsigned i = 0; i != LandingPads.size(); ) { + LandingPadInfo &LandingPad = LandingPads[i]; + if (LandingPad.LandingPadLabel && + !LandingPad.LandingPadLabel->isDefined() && + (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0)) + LandingPad.LandingPadLabel = 0; + + // Special case: we *should* emit LPs with null LP MBB. This indicates + // "nounwind" case. + if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) { + MCSymbol *BeginLabel = LandingPad.BeginLabels[j]; + MCSymbol *EndLabel = LandingPad.EndLabels[j]; + if ((BeginLabel->isDefined() || + (LPMap && (*LPMap)[BeginLabel] != 0)) && + (EndLabel->isDefined() || + (LPMap && (*LPMap)[EndLabel] != 0))) continue; + + LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); + LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); + --j, --e; + } + + // Remove landing pads with no try-ranges. + if (LandingPads[i].BeginLabels.empty()) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + // If there is no landing pad, ensure that the list of typeids is empty. + // If the only typeid is a cleanup, this is the same as having no typeids. + if (!LandingPad.LandingPadBlock || + (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0])) + LandingPad.TypeIds.clear(); + ++i; + } +} + +/// getTypeIDFor - Return the type id for the specified typeinfo. This is +/// function wide. +unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) { + for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) + if (TypeInfos[i] == TI) return i + 1; + + TypeInfos.push_back(TI); + return TypeInfos.size(); +} + +/// getFilterIDFor - Return the filter id for the specified typeinfos. This is +/// function wide. +int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) { + // If the new filter coincides with the tail of an existing filter, then + // re-use the existing filter. Folding filters more than this requires + // re-ordering filters and/or their elements - probably not worth it. + for (std::vector<unsigned>::iterator I = FilterEnds.begin(), + E = FilterEnds.end(); I != E; ++I) { + unsigned i = *I, j = TyIds.size(); + + while (i && j) + if (FilterIds[--i] != TyIds[--j]) + goto try_next; + + if (!j) + // The new filter coincides with range [i, end) of the existing filter. + return -(1 + i); + +try_next:; + } + + // Add the new filter. + int FilterID = -(1 + FilterIds.size()); + FilterIds.reserve(FilterIds.size() + TyIds.size() + 1); + for (unsigned I = 0, N = TyIds.size(); I != N; ++I) + FilterIds.push_back(TyIds[I]); + FilterEnds.push_back(FilterIds.size()); + FilterIds.push_back(0); // terminator + return FilterID; +} + +/// getPersonality - Return the personality function for the current function. +const Function *MachineModuleInfo::getPersonality() const { + // FIXME: Until PR1414 will be fixed, we're using 1 personality function per + // function + return !LandingPads.empty() ? LandingPads[0].Personality : NULL; +} + +/// getPersonalityIndex - Return unique index for current personality +/// function. NULL/first personality function should always get zero index. +unsigned MachineModuleInfo::getPersonalityIndex() const { + const Function* Personality = NULL; + + // Scan landing pads. If there is at least one non-NULL personality - use it. + for (unsigned i = 0; i != LandingPads.size(); ++i) + if (LandingPads[i].Personality) { + Personality = LandingPads[i].Personality; + break; + } + + for (unsigned i = 0; i < Personalities.size(); ++i) { + if (Personalities[i] == Personality) + return i; + } + + // This will happen if the current personality function is + // in the zero index. + return 0; +} + +namespace { + /// VariableDebugSorter - Comparison to sort the VariableDbgInfo map + /// by source location, to avoid depending on the arbitrary order that + /// instruction selection visits variables in. + struct VariableDebugSorter { + bool operator()(const MachineModuleInfo::VariableDbgInfoMapTy::value_type &A, + const MachineModuleInfo::VariableDbgInfoMapTy::value_type &B) + const { + if (A.second.second.getLine() != B.second.second.getLine()) + return A.second.second.getLine() < B.second.second.getLine(); + if (A.second.second.getCol() != B.second.second.getCol()) + return A.second.second.getCol() < B.second.second.getCol(); + return false; + } + }; +} diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp new file mode 100644 index 0000000..5ab56c0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -0,0 +1,45 @@ +//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements object-file format specific implementations of +// MachineModuleInfoImpl. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCSymbol.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineModuleInfoMachO +//===----------------------------------------------------------------------===// + +// Out of line virtual method. +void MachineModuleInfoMachO::Anchor() {} +void MachineModuleInfoELF::Anchor() {} + +static int SortSymbolPair(const void *LHS, const void *RHS) { + typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy; + const MCSymbol *LHSS = ((const PairTy *)LHS)->first; + const MCSymbol *RHSS = ((const PairTy *)RHS)->first; + return LHSS->getName().compare(RHSS->getName()); +} + +/// GetSortedStubs - Return the entries from a DenseMap in a deterministic +/// sorted orer. +MachineModuleInfoImpl::SymbolListTy +MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*, + MachineModuleInfoImpl::StubValueTy>&Map) { + MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end()); + + if (!List.empty()) + qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair); + return List; +} + diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp new file mode 100644 index 0000000..9f4ef12 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp @@ -0,0 +1,41 @@ +//===-- CodeGen/MachineInstr.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the machine function pass registry for register allocators +// and instruction schedulers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePassRegistry.h" + +using namespace llvm; + + +/// Add - Adds a function pass to the registration list. +/// +void MachinePassRegistry::Add(MachinePassRegistryNode *Node) { + Node->setNext(List); + List = Node; + if (Listener) Listener->NotifyAdd(Node->getName(), + Node->getCtor(), + Node->getDescription()); +} + + +/// Remove - Removes a function pass from the registration list. +/// +void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) { + for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) { + if (*I == Node) { + if (Listener) Listener->NotifyRemove(Node->getName()); + *I = (*I)->getNext(); + break; + } + } +} diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp new file mode 100644 index 0000000..5d852f2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -0,0 +1,224 @@ +//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the MachineRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { + VRegInfo.reserve(256); + RegAllocHints.reserve(256); + RegClass2VRegMap = new std::vector<unsigned>[TRI.getNumRegClasses()]; + UsedPhysRegs.resize(TRI.getNumRegs()); + + // Create the physreg use/def lists. + PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()]; + memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs()); +} + +MachineRegisterInfo::~MachineRegisterInfo() { +#ifndef NDEBUG + for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) + assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?"); + for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i) + assert(!PhysRegUseDefLists[i] && + "PhysRegUseDefLists has entries after all instructions are deleted"); +#endif + delete [] PhysRegUseDefLists; + delete [] RegClass2VRegMap; +} + +/// setRegClass - Set the register class of the specified virtual register. +/// +void +MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { + unsigned VR = Reg; + Reg -= TargetRegisterInfo::FirstVirtualRegister; + assert(Reg < VRegInfo.size() && "Invalid vreg!"); + const TargetRegisterClass *OldRC = VRegInfo[Reg].first; + VRegInfo[Reg].first = RC; + + // Remove from old register class's vregs list. This may be slow but + // fortunately this operation is rarely needed. + std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()]; + std::vector<unsigned>::iterator I = std::find(VRegs.begin(), VRegs.end(), VR); + VRegs.erase(I); + + // Add to new register class's vregs list. + RegClass2VRegMap[RC->getID()].push_back(VR); +} + +/// createVirtualRegister - Create and return a new virtual register in the +/// function with the specified register class. +/// +unsigned +MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ + assert(RegClass && "Cannot create register without RegClass!"); + // Add a reg, but keep track of whether the vector reallocated or not. + void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0]; + VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0)); + RegAllocHints.push_back(std::make_pair(0, 0)); + + if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1))) + // The vector reallocated, handle this now. + HandleVRegListReallocation(); + unsigned VR = getLastVirtReg(); + RegClass2VRegMap[RegClass->getID()].push_back(VR); + return VR; +} + +/// HandleVRegListReallocation - We just added a virtual register to the +/// VRegInfo info list and it reallocated. Update the use/def lists info +/// pointers. +void MachineRegisterInfo::HandleVRegListReallocation() { + // The back pointers for the vreg lists point into the previous vector. + // Update them to point to their correct slots. + for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) { + MachineOperand *List = VRegInfo[i].second; + if (!List) continue; + // Update the back-pointer to be accurate once more. + List->Contents.Reg.Prev = &VRegInfo[i].second; + } +} + +/// replaceRegWith - Replace all instances of FromReg with ToReg in the +/// machine function. This is like llvm-level X->replaceAllUsesWith(Y), +/// except that it also changes any definitions of the register as well. +void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { + assert(FromReg != ToReg && "Cannot replace a reg with itself"); + + // TODO: This could be more efficient by bulk changing the operands. + for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { + MachineOperand &O = I.getOperand(); + ++I; + O.setReg(ToReg); + } +} + + +/// getVRegDef - Return the machine instr that defines the specified virtual +/// register or null if none is found. This assumes that the code is in SSA +/// form, so there should only be one definition. +MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { + assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() && + "Invalid vreg!"); + // Since we are in SSA form, we can use the first definition. + if (!def_empty(Reg)) + return &*def_begin(Reg); + return 0; +} + +bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { + use_iterator UI = use_begin(RegNo); + if (UI == use_end()) + return false; + return ++UI == use_end(); +} + +bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { + use_nodbg_iterator UI = use_nodbg_begin(RegNo); + if (UI == use_nodbg_end()) + return false; + return ++UI == use_nodbg_end(); +} + +/// clearKillFlags - Iterate over all the uses of the given register and +/// clear the kill flag from the MachineOperand. This function is used by +/// optimization passes which extend register lifetimes and need only +/// preserve conservative kill flag information. +void MachineRegisterInfo::clearKillFlags(unsigned Reg) const { + for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI) + UI.getOperand().setIsKill(false); +} + +bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->first == Reg || I->second == Reg) + return true; + return false; +} + +bool MachineRegisterInfo::isLiveOut(unsigned Reg) const { + for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I) + if (*I == Reg) + return true; + return false; +} + +/// getLiveInPhysReg - If VReg is a live-in virtual register, return the +/// corresponding live-in physical register. +unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->second == VReg) + return I->first; + return 0; +} + +/// getLiveInVirtReg - If PReg is a live-in physical register, return the +/// corresponding live-in physical register. +unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->first == PReg) + return I->second; + return 0; +} + +/// EmitLiveInCopies - Emit copies to initialize livein virtual registers +/// into the given entry block. +void +MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + // Emit the copies into the top of the block. + for (unsigned i = 0, e = LiveIns.size(); i != e; ++i) + if (LiveIns[i].second) { + if (use_empty(LiveIns[i].second)) { + // The livein has no uses. Drop it. + // + // It would be preferable to have isel avoid creating live-in + // records for unused arguments in the first place, but it's + // complicated by the debug info code for arguments. + LiveIns.erase(LiveIns.begin() + i); + --i; --e; + } else { + // Emit a copy. + BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(), + TII.get(TargetOpcode::COPY), LiveIns[i].second) + .addReg(LiveIns[i].first); + + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); + } + } else { + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); + } +} + +void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) { + for (int i = UsedPhysRegs.find_first(); i >= 0; + i = UsedPhysRegs.find_next(i)) + for (const unsigned *SS = TRI.getSubRegisters(i); + unsigned SubReg = *SS; ++SS) + if (SubReg > unsigned(i)) + UsedPhysRegs.set(SubReg); +} + +#ifndef NDEBUG +void MachineRegisterInfo::dumpUses(unsigned Reg) const { + for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) + I.getOperand().getParent()->dump(); +} +#endif diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp new file mode 100644 index 0000000..84d6df2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -0,0 +1,372 @@ +//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MachineSSAUpdater class. It's based on SSAUpdater +// class in lib/Transforms/Utils. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" +using namespace llvm; + +typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; +static AvailableValsTy &getAvailableVals(void *AV) { + return *static_cast<AvailableValsTy*>(AV); +} + +MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, + SmallVectorImpl<MachineInstr*> *NewPHI) + : AV(0), InsertedPHIs(NewPHI) { + TII = MF.getTarget().getInstrInfo(); + MRI = &MF.getRegInfo(); +} + +MachineSSAUpdater::~MachineSSAUpdater() { + delete &getAvailableVals(AV); +} + +/// Initialize - Reset this object to get ready for a new set of SSA +/// updates. ProtoValue is the value used to name PHI nodes. +void MachineSSAUpdater::Initialize(unsigned V) { + if (AV == 0) + AV = new AvailableValsTy(); + else + getAvailableVals(AV).clear(); + + VR = V; + VRC = MRI->getRegClass(VR); +} + +/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for +/// the specified block. +bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const { + return getAvailableVals(AV).count(BB); +} + +/// AddAvailableValue - Indicate that a rewritten value is available in the +/// specified block with the specified value. +void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) { + getAvailableVals(AV)[BB] = V; +} + +/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is +/// live at the end of the specified block. +unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { + return GetValueAtEndOfBlockInternal(BB); +} + +static +unsigned LookForIdenticalPHI(MachineBasicBlock *BB, + SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) { + if (BB->empty()) + return 0; + + MachineBasicBlock::iterator I = BB->front(); + if (!I->isPHI()) + return 0; + + AvailableValsTy AVals; + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + AVals[PredValues[i].first] = PredValues[i].second; + while (I != BB->end() && I->isPHI()) { + bool Same = true; + for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) { + unsigned SrcReg = I->getOperand(i).getReg(); + MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB(); + if (AVals[SrcBB] != SrcReg) { + Same = false; + break; + } + } + if (Same) + return I->getOperand(0).getReg(); + ++I; + } + return 0; +} + +/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define +/// a value of the given register class at the start of the specified basic +/// block. It returns the virtual register defined by the instruction. +static +MachineInstr *InsertNewDef(unsigned Opcode, + MachineBasicBlock *BB, MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + unsigned NewVR = MRI->createVirtualRegister(RC); + return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR); +} + +/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that +/// is live in the middle of the specified block. +/// +/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one +/// important case: if there is a definition of the rewritten value after the +/// 'use' in BB. Consider code like this: +/// +/// X1 = ... +/// SomeBB: +/// use(X) +/// X2 = ... +/// br Cond, SomeBB, OutBB +/// +/// In this case, there are two values (X1 and X2) added to the AvailableVals +/// set by the client of the rewriter, and those values are both live out of +/// their respective blocks. However, the use of X happens in the *middle* of +/// a block. Because of this, we need to insert a new PHI node in SomeBB to +/// merge the appropriate values, and this value isn't live out of the block. +/// +unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { + // If there is no definition of the renamed variable in this block, just use + // GetValueAtEndOfBlock to do our work. + if (!HasValueForBlock(BB)) + return GetValueAtEndOfBlockInternal(BB); + + // If there are no predecessors, just return undef. + if (BB->pred_empty()) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, + BB, BB->getFirstTerminator(), + VRC, MRI, TII); + return NewDef->getOperand(0).getReg(); + } + + // Otherwise, we have the hard case. Get the live-in values for each + // predecessor. + SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues; + unsigned SingularValue = 0; + + bool isFirstPred = true; + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PredBB = *PI; + unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + + // Otherwise, if all the merged values are the same, just use it. + if (SingularValue != 0) + return SingularValue; + + // If an identical PHI is already in BB, just reuse it. + unsigned DupPHI = LookForIdenticalPHI(BB, PredValues); + if (DupPHI) + return DupPHI; + + // Otherwise, we do need a PHI: insert one now. + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, + Loc, VRC, MRI, TII); + + // Fill in all the predecessors of the PHI. + MachineInstrBuilder MIB(InsertedPHI); + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) { + InsertedPHI->eraseFromParent(); + return ConstVal; + } + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); + return InsertedPHI->getOperand(0).getReg(); +} + +static +MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI, + MachineOperand *U) { + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + if (&MI->getOperand(i) == U) + return MI->getOperand(i+1).getMBB(); + } + + llvm_unreachable("MachineOperand::getParent() failure?"); + return 0; +} + +/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, +/// which use their value in the corresponding predecessor. +void MachineSSAUpdater::RewriteUse(MachineOperand &U) { + MachineInstr *UseMI = U.getParent(); + unsigned NewVR = 0; + if (UseMI->isPHI()) { + MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U); + NewVR = GetValueAtEndOfBlockInternal(SourceBB); + } else { + NewVR = GetValueInMiddleOfBlock(UseMI->getParent()); + } + + U.setReg(NewVR); +} + +void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { + MRI->replaceRegWith(OldReg, NewReg); + + AvailableValsTy &AvailableVals = getAvailableVals(AV); + for (DenseMap<MachineBasicBlock*, unsigned>::iterator + I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I) + if (I->second == OldReg) + I->second = NewReg; +} + +/// MachinePHIiter - Iterator for PHI operands. This is used for the +/// PHI_iterator in the SSAUpdaterImpl template. +namespace { + class MachinePHIiter { + private: + MachineInstr *PHI; + unsigned idx; + + public: + explicit MachinePHIiter(MachineInstr *P) // begin iterator + : PHI(P), idx(1) {} + MachinePHIiter(MachineInstr *P, bool) // end iterator + : PHI(P), idx(PHI->getNumOperands()) {} + + MachinePHIiter &operator++() { idx += 2; return *this; } + bool operator==(const MachinePHIiter& x) const { return idx == x.idx; } + bool operator!=(const MachinePHIiter& x) const { return !operator==(x); } + unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); } + MachineBasicBlock *getIncomingBlock() { + return PHI->getOperand(idx+1).getMBB(); + } + }; +} + +/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl +/// template, specialized for MachineSSAUpdater. +namespace llvm { +template<> +class SSAUpdaterTraits<MachineSSAUpdater> { +public: + typedef MachineBasicBlock BlkT; + typedef unsigned ValT; + typedef MachineInstr PhiT; + + typedef MachineBasicBlock::succ_iterator BlkSucc_iterator; + static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); } + static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); } + + typedef MachinePHIiter PHI_iterator; + static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static inline PHI_iterator PHI_end(PhiT *PHI) { + return PHI_iterator(PHI, true); + } + + /// FindPredecessorBlocks - Put the predecessors of BB into the Preds + /// vector. + static void FindPredecessorBlocks(MachineBasicBlock *BB, + SmallVectorImpl<MachineBasicBlock*> *Preds){ + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) + Preds->push_back(*PI); + } + + /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register. + /// Add it into the specified block and return the register. + static unsigned GetUndefVal(MachineBasicBlock *BB, + MachineSSAUpdater *Updater) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, + BB, BB->getFirstTerminator(), + Updater->VRC, Updater->MRI, + Updater->TII); + return NewDef->getOperand(0).getReg(); + } + + /// CreateEmptyPHI - Create a PHI instruction that defines a new register. + /// Add it into the specified block and return the register. + static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds, + MachineSSAUpdater *Updater) { + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, + Updater->VRC, Updater->MRI, + Updater->TII); + return PHI->getOperand(0).getReg(); + } + + /// AddPHIOperand - Add the specified value as an operand of the PHI for + /// the specified predecessor block. + static void AddPHIOperand(MachineInstr *PHI, unsigned Val, + MachineBasicBlock *Pred) { + PHI->addOperand(MachineOperand::CreateReg(Val, false)); + PHI->addOperand(MachineOperand::CreateMBB(Pred)); + } + + /// InstrIsPHI - Check if an instruction is a PHI. + /// + static MachineInstr *InstrIsPHI(MachineInstr *I) { + if (I && I->isPHI()) + return I; + return 0; + } + + /// ValueIsPHI - Check if the instruction that defines the specified register + /// is a PHI instruction. + static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) { + return InstrIsPHI(Updater->MRI->getVRegDef(Val)); + } + + /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source + /// operands, i.e., it was just added. + static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) { + MachineInstr *PHI = ValueIsPHI(Val, Updater); + if (PHI && PHI->getNumOperands() <= 1) + return PHI; + return 0; + } + + /// GetPHIValue - For the specified PHI instruction, return the register + /// that it defines. + static unsigned GetPHIValue(MachineInstr *PHI) { + return PHI->getOperand(0).getReg(); + } +}; + +} // End llvm namespace + +/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry +/// for the specified BB and if so, return it. If not, construct SSA form by +/// first calculating the required placement of PHIs and then inserting new +/// PHIs where needed. +unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ + AvailableValsTy &AvailableVals = getAvailableVals(AV); + if (unsigned V = AvailableVals[BB]) + return V; + + SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); + return Impl.GetValue(BB); +} diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp new file mode 100644 index 0000000..c8f8faf --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -0,0 +1,445 @@ +//===-- MachineSink.cpp - Sinking for machine instructions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass moves instructions into successor blocks when possible, so that +// they aren't executed on paths where their results aren't needed. +// +// This pass is not intended to be a replacement or a complete alternative +// for an LLVM-IR-level sinking pass. It is only designed to sink simple +// constructs that are not exposed before lowering and instruction selection. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-sink" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<bool> +SplitEdges("machine-sink-split", + cl::desc("Split critical edges during machine sinking"), + cl::init(false), cl::Hidden); +static cl::opt<unsigned> +SplitLimit("split-limit", + cl::init(~0u), cl::Hidden); + +STATISTIC(NumSunk, "Number of machine instructions sunk"); +STATISTIC(NumSplit, "Number of critical edges split"); + +namespace { + class MachineSinking : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *RegInfo; // Machine register information + MachineDominatorTree *DT; // Machine dominator tree + MachineLoopInfo *LI; + AliasAnalysis *AA; + BitVector AllocatableSet; // Which physregs are allocatable? + + public: + static char ID; // Pass identification + MachineSinking() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + } + private: + bool ProcessBlock(MachineBasicBlock &MBB); + MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *From, + MachineBasicBlock *To); + bool SinkInstruction(MachineInstr *MI, bool &SawStore); + bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, bool &LocalUse) const; + }; +} // end anonymous namespace + +char MachineSinking::ID = 0; +INITIALIZE_PASS(MachineSinking, "machine-sink", + "Machine code sinking", false, false); + +FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); } + +/// AllUsesDominatedByBlock - Return true if all uses of the specified register +/// occur in blocks dominated by the specified block. If any use is in the +/// definition block, then return false since it is never legal to move def +/// after uses. +bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, + MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, + bool &LocalUse) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Only makes sense for vregs"); + // Ignoring debug uses is necessary so debug info doesn't affect the code. + // This may leave a referencing dbg_value in the original block, before + // the definition of the vreg. Dwarf generator handles this although the + // user might not get the right info at runtime. + for (MachineRegisterInfo::use_nodbg_iterator + I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end(); + I != E; ++I) { + // Determine the block of the use. + MachineInstr *UseInst = &*I; + MachineBasicBlock *UseBlock = UseInst->getParent(); + + if (UseInst->isPHI()) { + // PHI nodes use the operand in the predecessor block, not the block with + // the PHI. + UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); + } else if (UseBlock == DefMBB) { + LocalUse = true; + return false; + } + + // Check that it dominates. + if (!DT->dominates(MBB, UseBlock)) + return false; + } + + return true; +} + +bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "******** Machine Sinking ********\n"); + + const TargetMachine &TM = MF.getTarget(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + RegInfo = &MF.getRegInfo(); + DT = &getAnalysis<MachineDominatorTree>(); + LI = &getAnalysis<MachineLoopInfo>(); + AA = &getAnalysis<AliasAnalysis>(); + AllocatableSet = TRI->getAllocatableSet(MF); + + bool EverMadeChange = false; + + while (1) { + bool MadeChange = false; + + // Process all basic blocks. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) + MadeChange |= ProcessBlock(*I); + + // If this iteration over the code changed anything, keep iterating. + if (!MadeChange) break; + EverMadeChange = true; + } + return EverMadeChange; +} + +bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { + // Can't sink anything out of a block that has less than two successors. + if (MBB.succ_size() <= 1 || MBB.empty()) return false; + + // Don't bother sinking code out of unreachable blocks. In addition to being + // unprofitable, it can also lead to infinite looping, because in an + // unreachable loop there may be nowhere to stop. + if (!DT->isReachableFromEntry(&MBB)) return false; + + bool MadeChange = false; + + // Walk the basic block bottom-up. Remember if we saw a store. + MachineBasicBlock::iterator I = MBB.end(); + --I; + bool ProcessedBegin, SawStore = false; + do { + MachineInstr *MI = I; // The instruction to sink. + + // Predecrement I (if it's not begin) so that it isn't invalidated by + // sinking. + ProcessedBegin = I == MBB.begin(); + if (!ProcessedBegin) + --I; + + if (MI->isDebugValue()) + continue; + + if (SinkInstruction(MI, SawStore)) + ++NumSunk, MadeChange = true; + + // If we just processed the first instruction in the block, we're done. + } while (!ProcessedBegin); + + return MadeChange; +} + +MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB) { + // Avoid breaking back edge. From == To means backedge for single BB loop. + if (!SplitEdges || NumSplit == SplitLimit || FromBB == ToBB) + return 0; + + // Check for more "complex" loops. + if (LI->getLoopFor(FromBB) != LI->getLoopFor(ToBB) || + !LI->isLoopHeader(ToBB)) { + // It's not always legal to break critical edges and sink the computation + // to the edge. + // + // BB#1: + // v1024 + // Beq BB#3 + // <fallthrough> + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted: + // + // BB#1: + // ... + // Bne BB#2 + // BB#4: + // v1024 = + // B BB#3 + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3 + // flow. We need to ensure the new basic block where the computation is + // sunk to dominates all the uses. + // It's only legal to break critical edge and sink the computation to the + // new block if all the predecessors of "To", except for "From", are + // not dominated by "From". Given SSA property, this means these + // predecessors are dominated by "To". + for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(), + E = ToBB->pred_end(); PI != E; ++PI) { + if (*PI == FromBB) + continue; + if (!DT->dominates(ToBB, *PI)) + return 0; + } + + // FIXME: Determine if it's cost effective to break this edge. + return FromBB->SplitCriticalEdge(ToBB, this); + } + + return 0; +} + +/// SinkInstruction - Determine whether it is safe to sink the specified machine +/// instruction out of its current block into a successor. +bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { + // Check if it's safe to move the instruction. + if (!MI->isSafeToMove(TII, AA, SawStore)) + return false; + + // FIXME: This should include support for sinking instructions within the + // block they are currently in to shorten the live ranges. We often get + // instructions sunk into the top of a large block, but it would be better to + // also sink them down before their first use in the block. This xform has to + // be careful not to *increase* register pressure though, e.g. sinking + // "x = y + z" down if it kills y and z would increase the live ranges of y + // and z and only shrink the live range of x. + + // Loop over all the operands of the specified instruction. If there is + // anything we can't handle, bail out. + MachineBasicBlock *ParentBlock = MI->getParent(); + + // SuccToSinkTo - This is the successor to sink this instruction to, once we + // decide. + MachineBasicBlock *SuccToSinkTo = 0; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; // Ignore non-register operands. + + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!RegInfo->def_empty(Reg)) + return false; + + if (AllocatableSet.test(Reg)) + return false; + + // Check for a def among the register's aliases too. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (!RegInfo->def_empty(AliasReg)) + return false; + + if (AllocatableSet.test(AliasReg)) + return false; + } + } else if (!MO.isDead()) { + // A def that isn't dead. We can't move it. + return false; + } + } else { + // Virtual register uses are always safe to sink. + if (MO.isUse()) continue; + + // If it's not safe to move defs of the register class, then abort. + if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg))) + return false; + + // FIXME: This picks a successor to sink into based on having one + // successor that dominates all the uses. However, there are cases where + // sinking can happen but where the sink point isn't a successor. For + // example: + // + // x = computation + // if () {} else {} + // use x + // + // the instruction could be sunk over the whole diamond for the + // if/then/else (or loop, etc), allowing it to be sunk into other blocks + // after that. + + // Virtual register defs can only be sunk if all their uses are in blocks + // dominated by one of the successors. + if (SuccToSinkTo) { + // If a previous operand picked a block to sink to, then this operand + // must be sinkable to the same block. + bool LocalUse = false; + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, LocalUse)) + return false; + + continue; + } + + // Otherwise, we should look at all the successors and decide which one + // we should sink to. + for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), + E = ParentBlock->succ_end(); SI != E; ++SI) { + bool LocalUse = false; + if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, LocalUse)) { + SuccToSinkTo = *SI; + break; + } + if (LocalUse) + // Def is used locally, it's never safe to move this def. + return false; + } + + // If we couldn't find a block to sink to, ignore this instruction. + if (SuccToSinkTo == 0) + return false; + } + } + + // If there are no outputs, it must have side-effects. + if (SuccToSinkTo == 0) + return false; + + // It's not safe to sink instructions to EH landing pad. Control flow into + // landing pad is implicitly defined. + if (SuccToSinkTo->isLandingPad()) + return false; + + // It is not possible to sink an instruction into its own block. This can + // happen with loops. + if (MI->getParent() == SuccToSinkTo) + return false; + + // If the instruction to move defines a dead physical register which is live + // when leaving the basic block, don't move it because it could turn into a + // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (SuccToSinkTo->isLiveIn(Reg)) + return false; + } + + DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); + + // If the block has multiple predecessors, this would introduce computation on + // a path that it doesn't already exist. We could split the critical edge, + // but for now we just punt. + // FIXME: Split critical edges if not backedges. + if (SuccToSinkTo->pred_size() > 1) { + // We cannot sink a load across a critical edge - there may be stores in + // other code paths. + bool TryBreak = false; + bool store = true; + if (!MI->isSafeToMove(TII, AA, store)) { + DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); + TryBreak = true; + } + + // We don't want to sink across a critical edge if we don't dominate the + // successor. We could be introducing calculations to new code paths. + if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { + DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); + TryBreak = true; + } + + // Don't sink instructions into a loop. + if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { + DEBUG(dbgs() << " *** NOTE: Loop header found\n"); + TryBreak = true; + } + + // Otherwise we are OK with sinking along a critical edge. + if (!TryBreak) + DEBUG(dbgs() << "Sinking along critical edge.\n"); + else { + MachineBasicBlock *NewSucc = SplitCriticalEdge(ParentBlock, SuccToSinkTo); + if (!NewSucc) { + DEBUG(dbgs() << + " *** PUNTING: Not legal or profitable to break critical edge\n"); + return false; + } else { + DEBUG(dbgs() << " *** Splitting critical edge:" + " BB#" << ParentBlock->getNumber() + << " -- BB#" << NewSucc->getNumber() + << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); + SuccToSinkTo = NewSucc; + ++NumSplit; + } + } + } + + // Determine where to insert into. Skip phi nodes. + MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); + while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) + ++InsertPos; + + // Move the instruction. + SuccToSinkTo->splice(InsertPos, ParentBlock, MI, + ++MachineBasicBlock::iterator(MI)); + + // Conservatively, clear any kill flags, since it's possible that they are no + // longer correct. + MI->clearKillInfo(); + + return true; +} diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp new file mode 100644 index 0000000..1e88562 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -0,0 +1,933 @@ +//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pass to verify generated machine code. The following is checked: +// +// Operand counts: All explicit operands must be present. +// +// Register classes: All physical and virtual register operands must be +// compatible with the register class required by the instruction descriptor. +// +// Register live intervals: Registers must be defined only once, and must be +// defined before use. +// +// The machine code verifier is enabled from LLVMTargetMachine.cpp with the +// command-line option -verify-machineinstrs, or by defining the environment +// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive +// the verifier errors. +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + struct MachineVerifier { + + MachineVerifier(Pass *pass) : + PASS(pass), + OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) + {} + + bool runOnMachineFunction(MachineFunction &MF); + + Pass *const PASS; + const char *const OutFileName; + raw_ostream *OS; + const MachineFunction *MF; + const TargetMachine *TM; + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + + unsigned foundErrors; + + typedef SmallVector<unsigned, 16> RegVector; + typedef DenseSet<unsigned> RegSet; + typedef DenseMap<unsigned, const MachineInstr*> RegMap; + + BitVector regsReserved; + RegSet regsLive; + RegVector regsDefined, regsDead, regsKilled; + RegSet regsLiveInButUnused; + + // Add Reg and any sub-registers to RV + void addRegWithSubRegs(RegVector &RV, unsigned Reg) { + RV.push_back(Reg); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++) + RV.push_back(*R); + } + + struct BBInfo { + // Is this MBB reachable from the MF entry point? + bool reachable; + + // Vregs that must be live in because they are used without being + // defined. Map value is the user. + RegMap vregsLiveIn; + + // Regs killed in MBB. They may be defined again, and will then be in both + // regsKilled and regsLiveOut. + RegSet regsKilled; + + // Regs defined in MBB and live out. Note that vregs passing through may + // be live out without being mentioned here. + RegSet regsLiveOut; + + // Vregs that pass through MBB untouched. This set is disjoint from + // regsKilled and regsLiveOut. + RegSet vregsPassed; + + // Vregs that must pass through MBB because they are needed by a successor + // block. This set is disjoint from regsLiveOut. + RegSet vregsRequired; + + BBInfo() : reachable(false) {} + + // Add register to vregsPassed if it belongs there. Return true if + // anything changed. + bool addPassed(unsigned Reg) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (regsKilled.count(Reg) || regsLiveOut.count(Reg)) + return false; + return vregsPassed.insert(Reg).second; + } + + // Same for a full set. + bool addPassed(const RegSet &RS) { + bool changed = false; + for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) + if (addPassed(*I)) + changed = true; + return changed; + } + + // Add register to vregsRequired if it belongs there. Return true if + // anything changed. + bool addRequired(unsigned Reg) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (regsLiveOut.count(Reg)) + return false; + return vregsRequired.insert(Reg).second; + } + + // Same for a full set. + bool addRequired(const RegSet &RS) { + bool changed = false; + for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) + if (addRequired(*I)) + changed = true; + return changed; + } + + // Same for a full map. + bool addRequired(const RegMap &RM) { + bool changed = false; + for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I) + if (addRequired(I->first)) + changed = true; + return changed; + } + + // Live-out registers are either in regsLiveOut or vregsPassed. + bool isLiveOut(unsigned Reg) const { + return regsLiveOut.count(Reg) || vregsPassed.count(Reg); + } + }; + + // Extra register info per MBB. + DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap; + + bool isReserved(unsigned Reg) { + return Reg < regsReserved.size() && regsReserved.test(Reg); + } + + // Analysis information if available + LiveVariables *LiveVars; + const LiveIntervals *LiveInts; + + void visitMachineFunctionBefore(); + void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); + void visitMachineInstrBefore(const MachineInstr *MI); + void visitMachineOperand(const MachineOperand *MO, unsigned MONum); + void visitMachineInstrAfter(const MachineInstr *MI); + void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); + void visitMachineFunctionAfter(); + + void report(const char *msg, const MachineFunction *MF); + void report(const char *msg, const MachineBasicBlock *MBB); + void report(const char *msg, const MachineInstr *MI); + void report(const char *msg, const MachineOperand *MO, unsigned MONum); + + void markReachable(const MachineBasicBlock *MBB); + void calcRegsPassed(); + void checkPHIOps(const MachineBasicBlock *MBB); + + void calcRegsRequired(); + void verifyLiveVariables(); + void verifyLiveIntervals(); + }; + + struct MachineVerifierPass : public MachineFunctionPass { + static char ID; // Pass ID, replacement for typeid + + MachineVerifierPass() + : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) { + MF.verify(this); + return false; + } + }; + +} + +char MachineVerifierPass::ID = 0; +INITIALIZE_PASS(MachineVerifierPass, "machineverifier", + "Verify generated machine code", false, false); + +FunctionPass *llvm::createMachineVerifierPass() { + return new MachineVerifierPass(); +} + +void MachineFunction::verify(Pass *p) const { + MachineVerifier(p).runOnMachineFunction(const_cast<MachineFunction&>(*this)); +} + +bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { + raw_ostream *OutFile = 0; + if (OutFileName) { + std::string ErrorInfo; + OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, + raw_fd_ostream::F_Append); + if (!ErrorInfo.empty()) { + errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; + exit(1); + } + + OS = OutFile; + } else { + OS = &errs(); + } + + foundErrors = 0; + + this->MF = &MF; + TM = &MF.getTarget(); + TRI = TM->getRegisterInfo(); + MRI = &MF.getRegInfo(); + + LiveVars = NULL; + LiveInts = NULL; + if (PASS) { + LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>(); + // We don't want to verify LiveVariables if LiveIntervals is available. + if (!LiveInts) + LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>(); + } + + visitMachineFunctionBefore(); + for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); + MFI!=MFE; ++MFI) { + visitMachineBasicBlockBefore(MFI); + for (MachineBasicBlock::const_iterator MBBI = MFI->begin(), + MBBE = MFI->end(); MBBI != MBBE; ++MBBI) { + visitMachineInstrBefore(MBBI); + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) + visitMachineOperand(&MBBI->getOperand(I), I); + visitMachineInstrAfter(MBBI); + } + visitMachineBasicBlockAfter(MFI); + } + visitMachineFunctionAfter(); + + if (OutFile) + delete OutFile; + else if (foundErrors) + report_fatal_error("Found "+Twine(foundErrors)+" machine code errors."); + + // Clean up. + regsLive.clear(); + regsDefined.clear(); + regsDead.clear(); + regsKilled.clear(); + regsLiveInButUnused.clear(); + MBBInfoMap.clear(); + + return false; // no changes +} + +void MachineVerifier::report(const char *msg, const MachineFunction *MF) { + assert(MF); + *OS << '\n'; + if (!foundErrors++) + MF->print(*OS); + *OS << "*** Bad machine code: " << msg << " ***\n" + << "- function: " << MF->getFunction()->getNameStr() << "\n"; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { + assert(MBB); + report(msg, MBB->getParent()); + *OS << "- basic block: " << MBB->getName() + << " " << (void*)MBB + << " (BB#" << MBB->getNumber() << ")\n"; +} + +void MachineVerifier::report(const char *msg, const MachineInstr *MI) { + assert(MI); + report(msg, MI->getParent()); + *OS << "- instruction: "; + MI->print(*OS, TM); +} + +void MachineVerifier::report(const char *msg, + const MachineOperand *MO, unsigned MONum) { + assert(MO); + report(msg, MO->getParent()); + *OS << "- operand " << MONum << ": "; + MO->print(*OS, TM); + *OS << "\n"; +} + +void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { + BBInfo &MInfo = MBBInfoMap[MBB]; + if (!MInfo.reachable) { + MInfo.reachable = true; + for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), + SuE = MBB->succ_end(); SuI != SuE; ++SuI) + markReachable(*SuI); + } +} + +void MachineVerifier::visitMachineFunctionBefore() { + regsReserved = TRI->getReservedRegs(*MF); + + // A sub-register of a reserved register is also reserved + for (int Reg = regsReserved.find_first(); Reg>=0; + Reg = regsReserved.find_next(Reg)) { + for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) { + // FIXME: This should probably be: + // assert(regsReserved.test(*Sub) && "Non-reserved sub-register"); + regsReserved.set(*Sub); + } + } + markReachable(&MF->front()); +} + +// Does iterator point to a and b as the first two elements? +static bool matchPair(MachineBasicBlock::const_succ_iterator i, + const MachineBasicBlock *a, const MachineBasicBlock *b) { + if (*i == a) + return *++i == b; + if (*i == b) + return *++i == a; + return false; +} + +void +MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + // Call AnalyzeBranch. If it succeeds, there several more conditions to check. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB), + TBB, FBB, Cond)) { + // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's + // check whether its answers match up with reality. + if (!TBB && !FBB) { + // Block falls through to its successor. + MachineFunction::const_iterator MBBI = MBB; + ++MBBI; + if (MBBI == MF->end()) { + // It's possible that the block legitimately ends with a noreturn + // call or an unreachable, in which case it won't actually fall + // out the bottom of the function. + } else if (MBB->succ_empty()) { + // It's possible that the block legitimately ends with a noreturn + // call or an unreachable, in which case it won't actuall fall + // out of the block. + } else if (MBB->succ_size() != 1) { + report("MBB exits via unconditional fall-through but doesn't have " + "exactly one CFG successor!", MBB); + } else if (MBB->succ_begin()[0] != MBBI) { + report("MBB exits via unconditional fall-through but its successor " + "differs from its CFG successor!", MBB); + } + if (!MBB->empty() && MBB->back().getDesc().isBarrier() && + !TII->isPredicated(&MBB->back())) { + report("MBB exits via unconditional fall-through but ends with a " + "barrier instruction!", MBB); + } + if (!Cond.empty()) { + report("MBB exits via unconditional fall-through but has a condition!", + MBB); + } + } else if (TBB && !FBB && Cond.empty()) { + // Block unconditionally branches somewhere. + if (MBB->succ_size() != 1) { + report("MBB exits via unconditional branch but doesn't have " + "exactly one CFG successor!", MBB); + } else if (MBB->succ_begin()[0] != TBB) { + report("MBB exits via unconditional branch but the CFG " + "successor doesn't match the actual successor!", MBB); + } + if (MBB->empty()) { + report("MBB exits via unconditional branch but doesn't contain " + "any instructions!", MBB); + } else if (!MBB->back().getDesc().isBarrier()) { + report("MBB exits via unconditional branch but doesn't end with a " + "barrier instruction!", MBB); + } else if (!MBB->back().getDesc().isTerminator()) { + report("MBB exits via unconditional branch but the branch isn't a " + "terminator instruction!", MBB); + } + } else if (TBB && !FBB && !Cond.empty()) { + // Block conditionally branches somewhere, otherwise falls through. + MachineFunction::const_iterator MBBI = MBB; + ++MBBI; + if (MBBI == MF->end()) { + report("MBB conditionally falls through out of function!", MBB); + } if (MBB->succ_size() != 2) { + report("MBB exits via conditional branch/fall-through but doesn't have " + "exactly two CFG successors!", MBB); + } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { + report("MBB exits via conditional branch/fall-through but the CFG " + "successors don't match the actual successors!", MBB); + } + if (MBB->empty()) { + report("MBB exits via conditional branch/fall-through but doesn't " + "contain any instructions!", MBB); + } else if (MBB->back().getDesc().isBarrier()) { + report("MBB exits via conditional branch/fall-through but ends with a " + "barrier instruction!", MBB); + } else if (!MBB->back().getDesc().isTerminator()) { + report("MBB exits via conditional branch/fall-through but the branch " + "isn't a terminator instruction!", MBB); + } + } else if (TBB && FBB) { + // Block conditionally branches somewhere, otherwise branches + // somewhere else. + if (MBB->succ_size() != 2) { + report("MBB exits via conditional branch/branch but doesn't have " + "exactly two CFG successors!", MBB); + } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { + report("MBB exits via conditional branch/branch but the CFG " + "successors don't match the actual successors!", MBB); + } + if (MBB->empty()) { + report("MBB exits via conditional branch/branch but doesn't " + "contain any instructions!", MBB); + } else if (!MBB->back().getDesc().isBarrier()) { + report("MBB exits via conditional branch/branch but doesn't end with a " + "barrier instruction!", MBB); + } else if (!MBB->back().getDesc().isTerminator()) { + report("MBB exits via conditional branch/branch but the branch " + "isn't a terminator instruction!", MBB); + } + if (Cond.empty()) { + report("MBB exits via conditinal branch/branch but there's no " + "condition!", MBB); + } + } else { + report("AnalyzeBranch returned invalid data!", MBB); + } + } + + regsLive.clear(); + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) { + if (!TargetRegisterInfo::isPhysicalRegister(*I)) { + report("MBB live-in list contains non-physical register", MBB); + continue; + } + regsLive.insert(*I); + for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++) + regsLive.insert(*R); + } + regsLiveInButUnused = regsLive; + + const MachineFrameInfo *MFI = MF->getFrameInfo(); + assert(MFI && "Function has no frame info"); + BitVector PR = MFI->getPristineRegs(MBB); + for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { + regsLive.insert(I); + for (const unsigned *R = TRI->getSubRegisters(I); *R; R++) + regsLive.insert(*R); + } + + regsKilled.clear(); + regsDefined.clear(); +} + +void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { + const TargetInstrDesc &TI = MI->getDesc(); + if (MI->getNumOperands() < TI.getNumOperands()) { + report("Too few operands", MI); + *OS << TI.getNumOperands() << " operands expected, but " + << MI->getNumExplicitOperands() << " given.\n"; + } + + // Check the MachineMemOperands for basic consistency. + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) { + if ((*I)->isLoad() && !TI.mayLoad()) + report("Missing mayLoad flag", MI); + if ((*I)->isStore() && !TI.mayStore()) + report("Missing mayStore flag", MI); + } + + // Debug values must not have a slot index. + // Other instructions must have one. + if (LiveInts) { + bool mapped = !LiveInts->isNotInMIMap(MI); + if (MI->isDebugValue()) { + if (mapped) + report("Debug instruction has a slot index", MI); + } else { + if (!mapped) + report("Missing slot index", MI); + } + } + +} + +void +MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { + const MachineInstr *MI = MO->getParent(); + const TargetInstrDesc &TI = MI->getDesc(); + + // The first TI.NumDefs operands must be explicit register defines + if (MONum < TI.getNumDefs()) { + if (!MO->isReg()) + report("Explicit definition must be a register", MO, MONum); + else if (!MO->isDef()) + report("Explicit definition marked as use", MO, MONum); + else if (MO->isImplicit()) + report("Explicit definition marked as implicit", MO, MONum); + } else if (MONum < TI.getNumOperands()) { + if (MO->isReg()) { + if (MO->isDef()) + report("Explicit operand marked as def", MO, MONum); + if (MO->isImplicit()) + report("Explicit operand marked as implicit", MO, MONum); + } + } else { + // ARM adds %reg0 operands to indicate predicates. We'll allow that. + if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic() && MO->getReg()) + report("Extra explicit operand on non-variadic instruction", MO, MONum); + } + + switch (MO->getType()) { + case MachineOperand::MO_Register: { + const unsigned Reg = MO->getReg(); + if (!Reg) + return; + + // Check Live Variables. + if (MO->isUndef()) { + // An <undef> doesn't refer to any register, so just skip it. + } else if (MO->isUse()) { + regsLiveInButUnused.erase(Reg); + + bool isKill = false; + unsigned defIdx; + if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { + // A two-addr use counts as a kill if use and def are the same. + unsigned DefReg = MI->getOperand(defIdx).getReg(); + if (Reg == DefReg) { + isKill = true; + // ANd in that case an explicit kill flag is not allowed. + if (MO->isKill()) + report("Illegal kill flag on two-address instruction operand", + MO, MONum); + } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + report("Two-address instruction operands must be identical", + MO, MONum); + } + } else + isKill = MO->isKill(); + + if (isKill) + addRegWithSubRegs(regsKilled, Reg); + + // Check that LiveVars knows this kill. + if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && + MO->isKill()) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + if (std::find(VI.Kills.begin(), + VI.Kills.end(), MI) == VI.Kills.end()) + report("Kill missing from LiveVariables", MO, MONum); + } + + // Check LiveInts liveness and kill. + if (LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex(); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (!LI.liveAt(UseIdx)) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // TODO: Verify isKill == LI.killedAt. + } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + report("Virtual register has no Live interval", MO, MONum); + } + } + + // Use of a dead register. + if (!regsLive.count(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Reserved registers may be used even when 'dead'. + if (!isReserved(Reg)) + report("Using an undefined physical register", MO, MONum); + } else { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + // We don't know which virtual registers are live in, so only complain + // if vreg was killed in this MBB. Otherwise keep track of vregs that + // must be live in. PHI instructions are handled separately. + if (MInfo.regsKilled.count(Reg)) + report("Using a killed virtual register", MO, MONum); + else if (!MI->isPHI()) + MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); + } + } + } else { + assert(MO->isDef()); + // Register defined. + // TODO: verify that earlyclobber ops are not used. + if (MO->isDead()) + addRegWithSubRegs(regsDead, Reg); + else + addRegWithSubRegs(regsDefined, Reg); + + // Check LiveInts for a live range, but only for virtual registers. + if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && + !LiveInts->isNotInMIMap(MI)) { + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex(); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) { + assert(LR->valno && "NULL valno is not allowed"); + if (LR->valno->def != DefIdx) { + report("Inconsistent valno->def", MO, MONum); + *OS << "Valno " << LR->valno->id << " is not defined at " + << DefIdx << " in " << LI << '\n'; + } + } else { + report("No live range at def", MO, MONum); + *OS << DefIdx << " is not live in " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } + } + + // Check register classes. + if (MONum < TI.getNumOperands() && !MO->isImplicit()) { + const TargetOperandInfo &TOI = TI.OpInfo[MONum]; + unsigned SubIdx = MO->getSubReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + unsigned sr = Reg; + if (SubIdx) { + unsigned s = TRI->getSubReg(Reg, SubIdx); + if (!s) { + report("Invalid subregister index for physical register", + MO, MONum); + return; + } + sr = s; + } + if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { + if (!DRC->contains(sr)) { + report("Illegal physical register for instruction", MO, MONum); + *OS << TRI->getName(sr) << " is not a " + << DRC->getName() << " register.\n"; + } + } + } else { + // Virtual register. + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (SubIdx) { + const TargetRegisterClass *SRC = RC->getSubRegisterRegClass(SubIdx); + if (!SRC) { + report("Invalid subregister index for virtual register", MO, MONum); + *OS << "Register class " << RC->getName() + << " does not support subreg index " << SubIdx << "\n"; + return; + } + RC = SRC; + } + if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { + if (RC != DRC && !RC->hasSuperClass(DRC)) { + report("Illegal virtual register for instruction", MO, MONum); + *OS << "Expected a " << DRC->getName() << " register, but got a " + << RC->getName() << " register\n"; + } + } + } + } + break; + } + + case MachineOperand::MO_MachineBasicBlock: + if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent())) + report("PHI operand is not in the CFG", MO, MONum); + break; + + default: + break; + } +} + +void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + set_union(MInfo.regsKilled, regsKilled); + set_subtract(regsLive, regsKilled); regsKilled.clear(); + set_subtract(regsLive, regsDead); regsDead.clear(); + set_union(regsLive, regsDefined); regsDefined.clear(); +} + +void +MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { + MBBInfoMap[MBB].regsLiveOut = regsLive; + regsLive.clear(); +} + +// Calculate the largest possible vregsPassed sets. These are the registers that +// can pass through an MBB live, but may not be live every time. It is assumed +// that all vregsPassed sets are empty before the call. +void MachineVerifier::calcRegsPassed() { + // First push live-out regs to successors' vregsPassed. Remember the MBBs that + // have any vregsPassed. + DenseSet<const MachineBasicBlock*> todo; + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + const MachineBasicBlock &MBB(*MFI); + BBInfo &MInfo = MBBInfoMap[&MBB]; + if (!MInfo.reachable) + continue; + for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(), + SuE = MBB.succ_end(); SuI != SuE; ++SuI) { + BBInfo &SInfo = MBBInfoMap[*SuI]; + if (SInfo.addPassed(MInfo.regsLiveOut)) + todo.insert(*SuI); + } + } + + // Iteratively push vregsPassed to successors. This will converge to the same + // final state regardless of DenseSet iteration order. + while (!todo.empty()) { + const MachineBasicBlock *MBB = *todo.begin(); + todo.erase(MBB); + BBInfo &MInfo = MBBInfoMap[MBB]; + for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), + SuE = MBB->succ_end(); SuI != SuE; ++SuI) { + if (*SuI == MBB) + continue; + BBInfo &SInfo = MBBInfoMap[*SuI]; + if (SInfo.addPassed(MInfo.vregsPassed)) + todo.insert(*SuI); + } + } +} + +// Calculate the set of virtual registers that must be passed through each basic +// block in order to satisfy the requirements of successor blocks. This is very +// similar to calcRegsPassed, only backwards. +void MachineVerifier::calcRegsRequired() { + // First push live-in regs to predecessors' vregsRequired. + DenseSet<const MachineBasicBlock*> todo; + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + const MachineBasicBlock &MBB(*MFI); + BBInfo &MInfo = MBBInfoMap[&MBB]; + for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(), + PrE = MBB.pred_end(); PrI != PrE; ++PrI) { + BBInfo &PInfo = MBBInfoMap[*PrI]; + if (PInfo.addRequired(MInfo.vregsLiveIn)) + todo.insert(*PrI); + } + } + + // Iteratively push vregsRequired to predecessors. This will converge to the + // same final state regardless of DenseSet iteration order. + while (!todo.empty()) { + const MachineBasicBlock *MBB = *todo.begin(); + todo.erase(MBB); + BBInfo &MInfo = MBBInfoMap[MBB]; + for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), + PrE = MBB->pred_end(); PrI != PrE; ++PrI) { + if (*PrI == MBB) + continue; + BBInfo &SInfo = MBBInfoMap[*PrI]; + if (SInfo.addRequired(MInfo.vregsRequired)) + todo.insert(*PrI); + } + } +} + +// Check PHI instructions at the beginning of MBB. It is assumed that +// calcRegsPassed has been run so BBInfo::isLiveOut is valid. +void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { + for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + DenseSet<const MachineBasicBlock*> seen; + + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI->getOperand(i).getReg(); + const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB(); + if (!Pre->isSuccessor(MBB)) + continue; + seen.insert(Pre); + BBInfo &PrInfo = MBBInfoMap[Pre]; + if (PrInfo.reachable && !PrInfo.isLiveOut(Reg)) + report("PHI operand is not live-out from predecessor", + &BBI->getOperand(i), i); + } + + // Did we see all predecessors? + for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), + PrE = MBB->pred_end(); PrI != PrE; ++PrI) { + if (!seen.count(*PrI)) { + report("Missing PHI operand", BBI); + *OS << "BB#" << (*PrI)->getNumber() + << " is a predecessor according to the CFG.\n"; + } + } + } +} + +void MachineVerifier::visitMachineFunctionAfter() { + calcRegsPassed(); + + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + + // Skip unreachable MBBs. + if (!MInfo.reachable) + continue; + + checkPHIOps(MFI); + } + + // Now check liveness info if available + if (LiveVars || LiveInts) + calcRegsRequired(); + if (LiveVars) + verifyLiveVariables(); + if (LiveInts) + verifyLiveIntervals(); +} + +void MachineVerifier::verifyLiveVariables() { + assert(LiveVars && "Don't call verifyLiveVariables without LiveVars"); + for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister, + RegE = MRI->getLastVirtReg()-1; Reg != RegE; ++Reg) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + + // Our vregsRequired should be identical to LiveVariables' AliveBlocks + if (MInfo.vregsRequired.count(Reg)) { + if (!VI.AliveBlocks.test(MFI->getNumber())) { + report("LiveVariables: Block missing from AliveBlocks", MFI); + *OS << "Virtual register %reg" << Reg + << " must be live through the block.\n"; + } + } else { + if (VI.AliveBlocks.test(MFI->getNumber())) { + report("LiveVariables: Block should not be in AliveBlocks", MFI); + *OS << "Virtual register %reg" << Reg + << " is not needed live through the block.\n"; + } + } + } + } +} + +void MachineVerifier::verifyLiveIntervals() { + assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); + for (LiveIntervals::const_iterator LVI = LiveInts->begin(), + LVE = LiveInts->end(); LVI != LVE; ++LVI) { + const LiveInterval &LI = *LVI->second; + assert(LVI->first == LI.reg && "Invalid reg to interval mapping"); + + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I!=E; ++I) { + VNInfo *VNI = *I; + const LiveRange *DefLR = LI.getLiveRangeContaining(VNI->def); + + if (!DefLR) { + if (!VNI->isUnused()) { + report("Valno not live at def and not marked unused", MF); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + } + continue; + } + + if (VNI->isUnused()) + continue; + + if (DefLR->valno != VNI) { + report("Live range at def has different valno", MF); + DefLR->print(*OS); + *OS << " should use valno #" << VNI->id << " in " << LI << '\n'; + } + + } + + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) { + const LiveRange &LR = *I; + assert(LR.valno && "Live range has no valno"); + + if (LR.valno->id >= LI.getNumValNums() || + LR.valno != LI.getValNumInfo(LR.valno->id)) { + report("Foreign valno in live range", MF); + LR.print(*OS); + *OS << " has a valno not in " << LI << '\n'; + } + + if (LR.valno->isUnused()) { + report("Live range valno is marked unused", MF); + LR.print(*OS); + *OS << " in " << LI << '\n'; + } + + } + } +} + diff --git a/contrib/llvm/lib/CodeGen/Makefile b/contrib/llvm/lib/CodeGen/Makefile new file mode 100644 index 0000000..4ab3e3c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Makefile @@ -0,0 +1,22 @@ +##===- lib/CodeGen/Makefile --------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMCodeGen +PARALLEL_DIRS = SelectionDAG AsmPrinter +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + +# Xcode prior to 2.4 generates an error in -pedantic mode with use of HUGE_VAL +# in this directory. Disable -pedantic for this broken compiler. +ifneq ($(HUGE_VAL_SANITY),yes) +CompileCommonOpts := $(filter-out -pedantic, $(CompileCommonOpts)) +endif + diff --git a/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp new file mode 100644 index 0000000..cf05275 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp @@ -0,0 +1,141 @@ +//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/BinaryObject.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" + +//===----------------------------------------------------------------------===// +// ObjectCodeEmitter Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + +ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {} +ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {} +ObjectCodeEmitter::~ObjectCodeEmitter() {} + +/// setBinaryObject - set the BinaryObject we are writting to +void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; } + +/// emitByte - This callback is invoked when a byte needs to be +/// written to the data stream, without buffer overflow testing. +void ObjectCodeEmitter::emitByte(uint8_t B) { + BO->emitByte(B); +} + +/// emitWordLE - This callback is invoked when a 32-bit word needs to be +/// written to the data stream in little-endian format. +void ObjectCodeEmitter::emitWordLE(uint32_t W) { + BO->emitWordLE(W); +} + +/// emitWordBE - This callback is invoked when a 32-bit word needs to be +/// written to the data stream in big-endian format. +void ObjectCodeEmitter::emitWordBE(uint32_t W) { + BO->emitWordBE(W); +} + +/// emitDWordLE - This callback is invoked when a 64-bit word needs to be +/// written to the data stream in little-endian format. +void ObjectCodeEmitter::emitDWordLE(uint64_t W) { + BO->emitDWordLE(W); +} + +/// emitDWordBE - This callback is invoked when a 64-bit word needs to be +/// written to the data stream in big-endian format. +void ObjectCodeEmitter::emitDWordBE(uint64_t W) { + BO->emitDWordBE(W); +} + +/// emitAlignment - Align 'BO' to the necessary alignment boundary. +void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */, + uint8_t fill /* 0 */) { + BO->emitAlignment(Alignment, fill); +} + +/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be +/// written to the data stream. +void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) { + BO->emitULEB128Bytes(Value); +} + +/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be +/// written to the data stream. +void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) { + BO->emitSLEB128Bytes(Value); +} + +/// emitString - This callback is invoked when a String needs to be +/// written to the data stream. +void ObjectCodeEmitter::emitString(const std::string &String) { + BO->emitString(String); +} + +/// getCurrentPCValue - This returns the address that the next emitted byte +/// will be output to. +uintptr_t ObjectCodeEmitter::getCurrentPCValue() const { + return BO->getCurrentPCOffset(); +} + +/// getCurrentPCOffset - Return the offset from the start of the emitted +/// buffer that we are currently writing to. +uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const { + return BO->getCurrentPCOffset(); +} + +/// addRelocation - Whenever a relocatable address is needed, it should be +/// noted with this interface. +void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) { + BO->addRelocation(relocation); +} + +/// StartMachineBasicBlock - This should be called by the target when a new +/// basic block is about to be emitted. This way the MCE knows where the +/// start of the block is, and can implement getMachineBasicBlockAddress. +void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); +} + +/// getMachineBasicBlockAddress - Return the address of the specified +/// MachineBasicBlock, only usable after the label for the MBB has been +/// emitted. +uintptr_t +ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; +} + +/// getJumpTableEntryAddress - Return the address of the jump table with index +/// 'Index' in the function that last called initJumpTableInfo. +uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const { + assert(JTLocations.size() > Index && "JT not emitted!"); + return JTLocations[Index]; +} + +/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in +/// the constant pool that was last emitted with the emitConstantPool method. +uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const { + assert(CPLocations.size() > Index && "CP not emitted!"); + return CPLocations[Index]; +} + +/// getConstantPoolEntrySection - Return the section of the 'Index' entry in +/// the constant pool that was last emitted with the emitConstantPool method. +uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const { + assert(CPSections.size() > Index && "CP not emitted!"); + return CPSections[Index]; +} + +} // end namespace llvm + diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp new file mode 100644 index 0000000..48db200 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/OcamlGC.cpp @@ -0,0 +1,37 @@ +//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics compatible with +// Objective Caml 3.10.0, which uses a liveness-accurate static stack map. +// +// The frametable emitter is in OcamlGCPrinter.cpp. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCStrategy.h" + +using namespace llvm; + +namespace { + class OcamlGC : public GCStrategy { + public: + OcamlGC(); + }; +} + +static GCRegistry::Add<OcamlGC> +X("ocaml", "ocaml 3.10-compatible GC"); + +void llvm::linkOcamlGC() { } + +OcamlGC::OcamlGC() { + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; +} diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp new file mode 100644 index 0000000..edb4eea --- /dev/null +++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -0,0 +1,188 @@ +//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass optimizes machine instruction PHIs to take advantage of +// opportunities created during DAG legalization. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "phi-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Function.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); +STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); + +namespace { + class OptimizePHIs : public MachineFunctionPass { + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + + public: + static char ID; // Pass identification + OptimizePHIs() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + typedef SmallPtrSet<MachineInstr*, 16> InstrSet; + typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator; + + bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, + InstrSet &PHIsInCycle); + bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle); + bool OptimizeBB(MachineBasicBlock &MBB); + }; +} + +char OptimizePHIs::ID = 0; +INITIALIZE_PASS(OptimizePHIs, "opt-phis", + "Optimize machine instruction PHIs", false, false); + +FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); } + +bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { + MRI = &Fn.getRegInfo(); + TII = Fn.getTarget().getInstrInfo(); + + // Find dead PHI cycles and PHI cycles that can be replaced by a single + // value. InstCombine does these optimizations, but DAG legalization may + // introduce new opportunities, e.g., when i64 values are split up for + // 32-bit targets. + bool Changed = false; + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + Changed |= OptimizeBB(*I); + + return Changed; +} + +/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands +/// are copies of SingleValReg, possibly via copies through other PHIs. If +/// SingleValReg is zero on entry, it is set to the register with the single +/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that +/// have been scanned. +bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, + unsigned &SingleValReg, + InstrSet &PHIsInCycle) { + assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + + // See if we already saw this register. + if (!PHIsInCycle.insert(MI)) + return true; + + // Don't scan crazily complex things. + if (PHIsInCycle.size() == 16) + return false; + + // Scan the PHI operands. + for (unsigned i = 1; i != MI->getNumOperands(); i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + if (SrcReg == DstReg) + continue; + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + + // Skip over register-to-register moves. + if (SrcMI && SrcMI->isCopy() && + !SrcMI->getOperand(0).getSubReg() && + !SrcMI->getOperand(1).getSubReg() && + TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) + SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); + if (!SrcMI) + return false; + + if (SrcMI->isPHI()) { + if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle)) + return false; + } else { + // Fail if there is more than one non-phi/non-move register. + if (SingleValReg != 0) + return false; + SingleValReg = SrcReg; + } + } + return true; +} + +/// IsDeadPHICycle - Check if the register defined by a PHI is only used by +/// other PHIs in a cycle. +bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { + assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + "PHI destination is not a virtual register"); + + // See if we already saw this register. + if (!PHIsInCycle.insert(MI)) + return true; + + // Don't scan crazily complex things. + if (PHIsInCycle.size() == 16) + return false; + + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg), + E = MRI->use_end(); I != E; ++I) { + MachineInstr *UseMI = &*I; + if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle)) + return false; + } + + return true; +} + +/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by +/// a single value. +bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator + MII = MBB.begin(), E = MBB.end(); MII != E; ) { + MachineInstr *MI = &*MII++; + if (!MI->isPHI()) + break; + + // Check for single-value PHI cycles. + unsigned SingleValReg = 0; + InstrSet PHIsInCycle; + if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) && + SingleValReg != 0) { + MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg); + MI->eraseFromParent(); + ++NumPHICycles; + Changed = true; + continue; + } + + // Check for dead PHI cycles. + PHIsInCycle.clear(); + if (IsDeadPHICycle(MI, PHIsInCycle)) { + for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end(); + PI != PE; ++PI) { + MachineInstr *PhiMI = *PI; + if (&*MII == PhiMI) + ++MII; + PhiMI->eraseFromParent(); + } + ++NumDeadPHICycles; + Changed = true; + } + } + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/PBQP/Graph.h b/contrib/llvm/lib/CodeGen/PBQP/Graph.h new file mode 100644 index 0000000..b2224cb --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PBQP/Graph.h @@ -0,0 +1,425 @@ +//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// PBQP Graph class. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_CODEGEN_PBQP_GRAPH_H +#define LLVM_CODEGEN_PBQP_GRAPH_H + +#include "Math.h" + +#include <list> +#include <vector> +#include <map> + +namespace PBQP { + + /// PBQP Graph class. + /// Instances of this class describe PBQP problems. + class Graph { + private: + + // ----- TYPEDEFS ----- + class NodeEntry; + class EdgeEntry; + + typedef std::list<NodeEntry> NodeList; + typedef std::list<EdgeEntry> EdgeList; + + public: + + typedef NodeList::iterator NodeItr; + typedef NodeList::const_iterator ConstNodeItr; + + typedef EdgeList::iterator EdgeItr; + typedef EdgeList::const_iterator ConstEdgeItr; + + private: + + typedef std::list<EdgeItr> AdjEdgeList; + + public: + + typedef AdjEdgeList::iterator AdjEdgeItr; + + private: + + class NodeEntry { + private: + Vector costs; + AdjEdgeList adjEdges; + unsigned degree; + void *data; + public: + NodeEntry(const Vector &costs) : costs(costs), degree(0) {} + Vector& getCosts() { return costs; } + const Vector& getCosts() const { return costs; } + unsigned getDegree() const { return degree; } + AdjEdgeItr edgesBegin() { return adjEdges.begin(); } + AdjEdgeItr edgesEnd() { return adjEdges.end(); } + AdjEdgeItr addEdge(EdgeItr e) { + ++degree; + return adjEdges.insert(adjEdges.end(), e); + } + void removeEdge(AdjEdgeItr ae) { + --degree; + adjEdges.erase(ae); + } + void setData(void *data) { this->data = data; } + void* getData() { return data; } + }; + + class EdgeEntry { + private: + NodeItr node1, node2; + Matrix costs; + AdjEdgeItr node1AEItr, node2AEItr; + void *data; + public: + EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs) + : node1(node1), node2(node2), costs(costs) {} + NodeItr getNode1() const { return node1; } + NodeItr getNode2() const { return node2; } + Matrix& getCosts() { return costs; } + const Matrix& getCosts() const { return costs; } + void setNode1AEItr(AdjEdgeItr ae) { node1AEItr = ae; } + AdjEdgeItr getNode1AEItr() { return node1AEItr; } + void setNode2AEItr(AdjEdgeItr ae) { node2AEItr = ae; } + AdjEdgeItr getNode2AEItr() { return node2AEItr; } + void setData(void *data) { this->data = data; } + void *getData() { return data; } + }; + + // ----- MEMBERS ----- + + NodeList nodes; + unsigned numNodes; + + EdgeList edges; + unsigned numEdges; + + // ----- INTERNAL METHODS ----- + + NodeEntry& getNode(NodeItr nItr) { return *nItr; } + const NodeEntry& getNode(ConstNodeItr nItr) const { return *nItr; } + + EdgeEntry& getEdge(EdgeItr eItr) { return *eItr; } + const EdgeEntry& getEdge(ConstEdgeItr eItr) const { return *eItr; } + + NodeItr addConstructedNode(const NodeEntry &n) { + ++numNodes; + return nodes.insert(nodes.end(), n); + } + + EdgeItr addConstructedEdge(const EdgeEntry &e) { + assert(findEdge(e.getNode1(), e.getNode2()) == edges.end() && + "Attempt to add duplicate edge."); + ++numEdges; + EdgeItr edgeItr = edges.insert(edges.end(), e); + EdgeEntry &ne = getEdge(edgeItr); + NodeEntry &n1 = getNode(ne.getNode1()); + NodeEntry &n2 = getNode(ne.getNode2()); + // Sanity check on matrix dimensions: + assert((n1.getCosts().getLength() == ne.getCosts().getRows()) && + (n2.getCosts().getLength() == ne.getCosts().getCols()) && + "Edge cost dimensions do not match node costs dimensions."); + ne.setNode1AEItr(n1.addEdge(edgeItr)); + ne.setNode2AEItr(n2.addEdge(edgeItr)); + return edgeItr; + } + + inline void copyFrom(const Graph &other); + public: + + /// \brief Construct an empty PBQP graph. + Graph() : numNodes(0), numEdges(0) {} + + /// \brief Copy construct this graph from "other". Note: Does not copy node + /// and edge data, only graph structure and costs. + /// @param other Source graph to copy from. + Graph(const Graph &other) : numNodes(0), numEdges(0) { + copyFrom(other); + } + + /// \brief Make this graph a copy of "other". Note: Does not copy node and + /// edge data, only graph structure and costs. + /// @param other The graph to copy from. + /// @return A reference to this graph. + /// + /// This will clear the current graph, erasing any nodes and edges added, + /// before copying from other. + Graph& operator=(const Graph &other) { + clear(); + copyFrom(other); + return *this; + } + + /// \brief Add a node with the given costs. + /// @param costs Cost vector for the new node. + /// @return Node iterator for the added node. + NodeItr addNode(const Vector &costs) { + return addConstructedNode(NodeEntry(costs)); + } + + /// \brief Add an edge between the given nodes with the given costs. + /// @param n1Itr First node. + /// @param n2Itr Second node. + /// @return Edge iterator for the added edge. + EdgeItr addEdge(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr, + const Matrix &costs) { + assert(getNodeCosts(n1Itr).getLength() == costs.getRows() && + getNodeCosts(n2Itr).getLength() == costs.getCols() && + "Matrix dimensions mismatch."); + return addConstructedEdge(EdgeEntry(n1Itr, n2Itr, costs)); + } + + /// \brief Get the number of nodes in the graph. + /// @return Number of nodes in the graph. + unsigned getNumNodes() const { return numNodes; } + + /// \brief Get the number of edges in the graph. + /// @return Number of edges in the graph. + unsigned getNumEdges() const { return numEdges; } + + /// \brief Get a node's cost vector. + /// @param nItr Node iterator. + /// @return Node cost vector. + Vector& getNodeCosts(NodeItr nItr) { return getNode(nItr).getCosts(); } + + /// \brief Get a node's cost vector (const version). + /// @param nItr Node iterator. + /// @return Node cost vector. + const Vector& getNodeCosts(ConstNodeItr nItr) const { + return getNode(nItr).getCosts(); + } + + /// \brief Set a node's data pointer. + /// @param nItr Node iterator. + /// @param data Pointer to node data. + /// + /// Typically used by a PBQP solver to attach data to aid in solution. + void setNodeData(NodeItr nItr, void *data) { getNode(nItr).setData(data); } + + /// \brief Get the node's data pointer. + /// @param nItr Node iterator. + /// @return Pointer to node data. + void* getNodeData(NodeItr nItr) { return getNode(nItr).getData(); } + + /// \brief Get an edge's cost matrix. + /// @param eItr Edge iterator. + /// @return Edge cost matrix. + Matrix& getEdgeCosts(EdgeItr eItr) { return getEdge(eItr).getCosts(); } + + /// \brief Get an edge's cost matrix (const version). + /// @param eItr Edge iterator. + /// @return Edge cost matrix. + const Matrix& getEdgeCosts(ConstEdgeItr eItr) const { + return getEdge(eItr).getCosts(); + } + + /// \brief Set an edge's data pointer. + /// @param eItr Edge iterator. + /// @param data Pointer to edge data. + /// + /// Typically used by a PBQP solver to attach data to aid in solution. + void setEdgeData(EdgeItr eItr, void *data) { getEdge(eItr).setData(data); } + + /// \brief Get an edge's data pointer. + /// @param eItr Edge iterator. + /// @return Pointer to edge data. + void* getEdgeData(EdgeItr eItr) { return getEdge(eItr).getData(); } + + /// \brief Get a node's degree. + /// @param nItr Node iterator. + /// @return The degree of the node. + unsigned getNodeDegree(NodeItr nItr) const { + return getNode(nItr).getDegree(); + } + + /// \brief Begin iterator for node set. + NodeItr nodesBegin() { return nodes.begin(); } + + /// \brief Begin const iterator for node set. + ConstNodeItr nodesBegin() const { return nodes.begin(); } + + /// \brief End iterator for node set. + NodeItr nodesEnd() { return nodes.end(); } + + /// \brief End const iterator for node set. + ConstNodeItr nodesEnd() const { return nodes.end(); } + + /// \brief Begin iterator for edge set. + EdgeItr edgesBegin() { return edges.begin(); } + + /// \brief End iterator for edge set. + EdgeItr edgesEnd() { return edges.end(); } + + /// \brief Get begin iterator for adjacent edge set. + /// @param nItr Node iterator. + /// @return Begin iterator for the set of edges connected to the given node. + AdjEdgeItr adjEdgesBegin(NodeItr nItr) { + return getNode(nItr).edgesBegin(); + } + + /// \brief Get end iterator for adjacent edge set. + /// @param nItr Node iterator. + /// @return End iterator for the set of edges connected to the given node. + AdjEdgeItr adjEdgesEnd(NodeItr nItr) { + return getNode(nItr).edgesEnd(); + } + + /// \brief Get the first node connected to this edge. + /// @param eItr Edge iterator. + /// @return The first node connected to the given edge. + NodeItr getEdgeNode1(EdgeItr eItr) { + return getEdge(eItr).getNode1(); + } + + /// \brief Get the second node connected to this edge. + /// @param eItr Edge iterator. + /// @return The second node connected to the given edge. + NodeItr getEdgeNode2(EdgeItr eItr) { + return getEdge(eItr).getNode2(); + } + + /// \brief Get the "other" node connected to this edge. + /// @param eItr Edge iterator. + /// @param nItr Node iterator for the "given" node. + /// @return The iterator for the "other" node connected to this edge. + NodeItr getEdgeOtherNode(EdgeItr eItr, NodeItr nItr) { + EdgeEntry &e = getEdge(eItr); + if (e.getNode1() == nItr) { + return e.getNode2(); + } // else + return e.getNode1(); + } + + /// \brief Get the edge connecting two nodes. + /// @param n1Itr First node iterator. + /// @param n2Itr Second node iterator. + /// @return An iterator for edge (n1Itr, n2Itr) if such an edge exists, + /// otherwise returns edgesEnd(). + EdgeItr findEdge(NodeItr n1Itr, NodeItr n2Itr) { + for (AdjEdgeItr aeItr = adjEdgesBegin(n1Itr), aeEnd = adjEdgesEnd(n1Itr); + aeItr != aeEnd; ++aeItr) { + if ((getEdgeNode1(*aeItr) == n2Itr) || + (getEdgeNode2(*aeItr) == n2Itr)) { + return *aeItr; + } + } + return edges.end(); + } + + /// \brief Remove a node from the graph. + /// @param nItr Node iterator. + void removeNode(NodeItr nItr) { + NodeEntry &n = getNode(nItr); + for (AdjEdgeItr itr = n.edgesBegin(), end = n.edgesEnd(); itr != end;) { + EdgeItr eItr = *itr; + ++itr; + removeEdge(eItr); + } + nodes.erase(nItr); + --numNodes; + } + + /// \brief Remove an edge from the graph. + /// @param eItr Edge iterator. + void removeEdge(EdgeItr eItr) { + EdgeEntry &e = getEdge(eItr); + NodeEntry &n1 = getNode(e.getNode1()); + NodeEntry &n2 = getNode(e.getNode2()); + n1.removeEdge(e.getNode1AEItr()); + n2.removeEdge(e.getNode2AEItr()); + edges.erase(eItr); + --numEdges; + } + + /// \brief Remove all nodes and edges from the graph. + void clear() { + nodes.clear(); + edges.clear(); + numNodes = numEdges = 0; + } + + /// \brief Print a representation of this graph in DOT format. + /// @param os Output stream to print on. + template <typename OStream> + void printDot(OStream &os) { + + os << "graph {\n"; + + for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + os << " node" << nodeItr << " [ label=\"" + << nodeItr << ": " << getNodeCosts(nodeItr) << "\" ]\n"; + } + + os << " edge [ len=" << getNumNodes() << " ]\n"; + + for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + + os << " node" << getEdgeNode1(edgeItr) + << " -- node" << getEdgeNode2(edgeItr) + << " [ label=\""; + + const Matrix &edgeCosts = getEdgeCosts(edgeItr); + + for (unsigned i = 0; i < edgeCosts.getRows(); ++i) { + os << edgeCosts.getRowAsVector(i) << "\\n"; + } + os << "\" ]\n"; + } + os << "}\n"; + } + + }; + + class NodeItrComparator { + public: + bool operator()(Graph::NodeItr n1, Graph::NodeItr n2) const { + return &*n1 < &*n2; + } + + bool operator()(Graph::ConstNodeItr n1, Graph::ConstNodeItr n2) const { + return &*n1 < &*n2; + } + }; + + class EdgeItrCompartor { + public: + bool operator()(Graph::EdgeItr e1, Graph::EdgeItr e2) const { + return &*e1 < &*e2; + } + + bool operator()(Graph::ConstEdgeItr e1, Graph::ConstEdgeItr e2) const { + return &*e1 < &*e2; + } + }; + + void Graph::copyFrom(const Graph &other) { + std::map<Graph::ConstNodeItr, Graph::NodeItr, + NodeItrComparator> nodeMap; + + for (Graph::ConstNodeItr nItr = other.nodesBegin(), + nEnd = other.nodesEnd(); + nItr != nEnd; ++nItr) { + nodeMap[nItr] = addNode(other.getNodeCosts(nItr)); + } + + } + +} + +#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP diff --git a/contrib/llvm/lib/CodeGen/PBQP/HeuristicBase.h b/contrib/llvm/lib/CodeGen/PBQP/HeuristicBase.h new file mode 100644 index 0000000..791c227 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PBQP/HeuristicBase.h @@ -0,0 +1,246 @@ +//===-- HeuristcBase.h --- Heuristic base class for PBQP --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_HEURISTICBASE_H +#define LLVM_CODEGEN_PBQP_HEURISTICBASE_H + +#include "HeuristicSolver.h" + +namespace PBQP { + + /// \brief Abstract base class for heuristic implementations. + /// + /// This class provides a handy base for heuristic implementations with common + /// solver behaviour implemented for a number of methods. + /// + /// To implement your own heuristic using this class as a base you'll have to + /// implement, as a minimum, the following methods: + /// <ul> + /// <li> void addToHeuristicList(Graph::NodeItr) : Add a node to the + /// heuristic reduction list. + /// <li> void heuristicReduce() : Perform a single heuristic reduction. + /// <li> void preUpdateEdgeCosts(Graph::EdgeItr) : Handle the (imminent) + /// change to the cost matrix on the given edge (by R2). + /// <li> void postUpdateEdgeCostts(Graph::EdgeItr) : Handle the new + /// costs on the given edge. + /// <li> void handleAddEdge(Graph::EdgeItr) : Handle the addition of a new + /// edge into the PBQP graph (by R2). + /// <li> void handleRemoveEdge(Graph::EdgeItr, Graph::NodeItr) : Handle the + /// disconnection of the given edge from the given node. + /// <li> A constructor for your derived class : to pass back a reference to + /// the solver which is using this heuristic. + /// </ul> + /// + /// These methods are implemented in this class for documentation purposes, + /// but will assert if called. + /// + /// Note that this class uses the curiously recursive template idiom to + /// forward calls to the derived class. These methods need not be made + /// virtual, and indeed probably shouldn't for performance reasons. + /// + /// You'll also need to provide NodeData and EdgeData structs in your class. + /// These can be used to attach data relevant to your heuristic to each + /// node/edge in the PBQP graph. + + template <typename HImpl> + class HeuristicBase { + private: + + typedef std::list<Graph::NodeItr> OptimalList; + + HeuristicSolverImpl<HImpl> &s; + Graph &g; + OptimalList optimalList; + + // Return a reference to the derived heuristic. + HImpl& impl() { return static_cast<HImpl&>(*this); } + + // Add the given node to the optimal reductions list. Keep an iterator to + // its location for fast removal. + void addToOptimalReductionList(Graph::NodeItr nItr) { + optimalList.insert(optimalList.end(), nItr); + } + + public: + + /// \brief Construct an instance with a reference to the given solver. + /// @param solver The solver which is using this heuristic instance. + HeuristicBase(HeuristicSolverImpl<HImpl> &solver) + : s(solver), g(s.getGraph()) { } + + /// \brief Get the solver which is using this heuristic instance. + /// @return The solver which is using this heuristic instance. + /// + /// You can use this method to get access to the solver in your derived + /// heuristic implementation. + HeuristicSolverImpl<HImpl>& getSolver() { return s; } + + /// \brief Get the graph representing the problem to be solved. + /// @return The graph representing the problem to be solved. + Graph& getGraph() { return g; } + + /// \brief Tell the solver to simplify the graph before the reduction phase. + /// @return Whether or not the solver should run a simplification phase + /// prior to the main setup and reduction. + /// + /// HeuristicBase returns true from this method as it's a sensible default, + /// however you can over-ride it in your derived class if you want different + /// behaviour. + bool solverRunSimplify() const { return true; } + + /// \brief Decide whether a node should be optimally or heuristically + /// reduced. + /// @return Whether or not the given node should be listed for optimal + /// reduction (via R0, R1 or R2). + /// + /// HeuristicBase returns true for any node with degree less than 3. This is + /// sane and sensible for many situations, but not all. You can over-ride + /// this method in your derived class if you want a different selection + /// criteria. Note however that your criteria for selecting optimal nodes + /// should be <i>at least</i> as strong as this. I.e. Nodes of degree 3 or + /// higher should not be selected under any circumstances. + bool shouldOptimallyReduce(Graph::NodeItr nItr) { + if (g.getNodeDegree(nItr) < 3) + return true; + // else + return false; + } + + /// \brief Add the given node to the list of nodes to be optimally reduced. + /// @return nItr Node iterator to be added. + /// + /// You probably don't want to over-ride this, except perhaps to record + /// statistics before calling this implementation. HeuristicBase relies on + /// its behaviour. + void addToOptimalReduceList(Graph::NodeItr nItr) { + optimalList.push_back(nItr); + } + + /// \brief Initialise the heuristic. + /// + /// HeuristicBase iterates over all nodes in the problem and adds them to + /// the appropriate list using addToOptimalReduceList or + /// addToHeuristicReduceList based on the result of shouldOptimallyReduce. + /// + /// This behaviour should be fine for most situations. + void setup() { + for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); + nItr != nEnd; ++nItr) { + if (impl().shouldOptimallyReduce(nItr)) { + addToOptimalReduceList(nItr); + } else { + impl().addToHeuristicReduceList(nItr); + } + } + } + + /// \brief Optimally reduce one of the nodes in the optimal reduce list. + /// @return True if a reduction takes place, false if the optimal reduce + /// list is empty. + /// + /// Selects a node from the optimal reduce list and removes it, applying + /// R0, R1 or R2 as appropriate based on the selected node's degree. + bool optimalReduce() { + if (optimalList.empty()) + return false; + + Graph::NodeItr nItr = optimalList.front(); + optimalList.pop_front(); + + switch (s.getSolverDegree(nItr)) { + case 0: s.applyR0(nItr); break; + case 1: s.applyR1(nItr); break; + case 2: s.applyR2(nItr); break; + default: assert(false && + "Optimal reductions of degree > 2 nodes is invalid."); + } + + return true; + } + + /// \brief Perform the PBQP reduction process. + /// + /// Reduces the problem to the empty graph by repeated application of the + /// reduction rules R0, R1, R2 and RN. + /// R0, R1 or R2 are always applied if possible before RN is used. + void reduce() { + bool finished = false; + + while (!finished) { + if (!optimalReduce()) { + if (impl().heuristicReduce()) { + getSolver().recordRN(); + } else { + finished = true; + } + } + } + } + + /// \brief Add a node to the heuristic reduce list. + /// @param nItr Node iterator to add to the heuristic reduce list. + void addToHeuristicList(Graph::NodeItr nItr) { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Heuristically reduce one of the nodes in the heuristic + /// reduce list. + /// @return True if a reduction takes place, false if the heuristic reduce + /// list is empty. + void heuristicReduce() { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Prepare a change in the costs on the given edge. + /// @param eItr Edge iterator. + void preUpdateEdgeCosts(Graph::EdgeItr eItr) { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Handle the change in the costs on the given edge. + /// @param eItr Edge iterator. + void postUpdateEdgeCostts(Graph::EdgeItr eItr) { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Handle the addition of a new edge into the PBQP graph. + /// @param eItr Edge iterator for the added edge. + void handleAddEdge(Graph::EdgeItr eItr) { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Handle disconnection of an edge from a node. + /// @param eItr Edge iterator for edge being disconnected. + /// @param nItr Node iterator for the node being disconnected from. + /// + /// Edges are frequently removed due to the removal of a node. This + /// method allows for the effect to be computed only for the remaining + /// node in the graph. + void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Clean up any structures used by HeuristicBase. + /// + /// At present this just performs a sanity check: that the optimal reduce + /// list is empty now that reduction has completed. + /// + /// If your derived class has more complex structures which need tearing + /// down you should over-ride this method but include a call back to this + /// implementation. + void cleanup() { + assert(optimalList.empty() && "Nodes left over in optimal reduce list?"); + } + + }; + +} + + +#endif // LLVM_CODEGEN_PBQP_HEURISTICBASE_H diff --git a/contrib/llvm/lib/CodeGen/PBQP/HeuristicSolver.h b/contrib/llvm/lib/CodeGen/PBQP/HeuristicSolver.h new file mode 100644 index 0000000..35514f9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PBQP/HeuristicSolver.h @@ -0,0 +1,616 @@ +//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Heuristic PBQP solver. This solver is able to perform optimal reductions for +// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is +// used to select a node for reduction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H +#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H + +#include "Graph.h" +#include "Solution.h" +#include <vector> +#include <limits> + +namespace PBQP { + + /// \brief Heuristic PBQP solver implementation. + /// + /// This class should usually be created (and destroyed) indirectly via a call + /// to HeuristicSolver<HImpl>::solve(Graph&). + /// See the comments for HeuristicSolver. + /// + /// HeuristicSolverImpl provides the R0, R1 and R2 reduction rules, + /// backpropagation phase, and maintains the internal copy of the graph on + /// which the reduction is carried out (the original being kept to facilitate + /// backpropagation). + template <typename HImpl> + class HeuristicSolverImpl { + private: + + typedef typename HImpl::NodeData HeuristicNodeData; + typedef typename HImpl::EdgeData HeuristicEdgeData; + + typedef std::list<Graph::EdgeItr> SolverEdges; + + public: + + /// \brief Iterator type for edges in the solver graph. + typedef SolverEdges::iterator SolverEdgeItr; + + private: + + class NodeData { + public: + NodeData() : solverDegree(0) {} + + HeuristicNodeData& getHeuristicData() { return hData; } + + SolverEdgeItr addSolverEdge(Graph::EdgeItr eItr) { + ++solverDegree; + return solverEdges.insert(solverEdges.end(), eItr); + } + + void removeSolverEdge(SolverEdgeItr seItr) { + --solverDegree; + solverEdges.erase(seItr); + } + + SolverEdgeItr solverEdgesBegin() { return solverEdges.begin(); } + SolverEdgeItr solverEdgesEnd() { return solverEdges.end(); } + unsigned getSolverDegree() const { return solverDegree; } + void clearSolverEdges() { + solverDegree = 0; + solverEdges.clear(); + } + + private: + HeuristicNodeData hData; + unsigned solverDegree; + SolverEdges solverEdges; + }; + + class EdgeData { + public: + HeuristicEdgeData& getHeuristicData() { return hData; } + + void setN1SolverEdgeItr(SolverEdgeItr n1SolverEdgeItr) { + this->n1SolverEdgeItr = n1SolverEdgeItr; + } + + SolverEdgeItr getN1SolverEdgeItr() { return n1SolverEdgeItr; } + + void setN2SolverEdgeItr(SolverEdgeItr n2SolverEdgeItr){ + this->n2SolverEdgeItr = n2SolverEdgeItr; + } + + SolverEdgeItr getN2SolverEdgeItr() { return n2SolverEdgeItr; } + + private: + + HeuristicEdgeData hData; + SolverEdgeItr n1SolverEdgeItr, n2SolverEdgeItr; + }; + + Graph &g; + HImpl h; + Solution s; + std::vector<Graph::NodeItr> stack; + + typedef std::list<NodeData> NodeDataList; + NodeDataList nodeDataList; + + typedef std::list<EdgeData> EdgeDataList; + EdgeDataList edgeDataList; + + public: + + /// \brief Construct a heuristic solver implementation to solve the given + /// graph. + /// @param g The graph representing the problem instance to be solved. + HeuristicSolverImpl(Graph &g) : g(g), h(*this) {} + + /// \brief Get the graph being solved by this solver. + /// @return The graph representing the problem instance being solved by this + /// solver. + Graph& getGraph() { return g; } + + /// \brief Get the heuristic data attached to the given node. + /// @param nItr Node iterator. + /// @return The heuristic data attached to the given node. + HeuristicNodeData& getHeuristicNodeData(Graph::NodeItr nItr) { + return getSolverNodeData(nItr).getHeuristicData(); + } + + /// \brief Get the heuristic data attached to the given edge. + /// @param eItr Edge iterator. + /// @return The heuristic data attached to the given node. + HeuristicEdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) { + return getSolverEdgeData(eItr).getHeuristicData(); + } + + /// \brief Begin iterator for the set of edges adjacent to the given node in + /// the solver graph. + /// @param nItr Node iterator. + /// @return Begin iterator for the set of edges adjacent to the given node + /// in the solver graph. + SolverEdgeItr solverEdgesBegin(Graph::NodeItr nItr) { + return getSolverNodeData(nItr).solverEdgesBegin(); + } + + /// \brief End iterator for the set of edges adjacent to the given node in + /// the solver graph. + /// @param nItr Node iterator. + /// @return End iterator for the set of edges adjacent to the given node in + /// the solver graph. + SolverEdgeItr solverEdgesEnd(Graph::NodeItr nItr) { + return getSolverNodeData(nItr).solverEdgesEnd(); + } + + /// \brief Remove a node from the solver graph. + /// @param eItr Edge iterator for edge to be removed. + /// + /// Does <i>not</i> notify the heuristic of the removal. That should be + /// done manually if necessary. + void removeSolverEdge(Graph::EdgeItr eItr) { + EdgeData &eData = getSolverEdgeData(eItr); + NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)), + &n2Data = getSolverNodeData(g.getEdgeNode2(eItr)); + + n1Data.removeSolverEdge(eData.getN1SolverEdgeItr()); + n2Data.removeSolverEdge(eData.getN2SolverEdgeItr()); + } + + /// \brief Compute a solution to the PBQP problem instance with which this + /// heuristic solver was constructed. + /// @return A solution to the PBQP problem. + /// + /// Performs the full PBQP heuristic solver algorithm, including setup, + /// calls to the heuristic (which will call back to the reduction rules in + /// this class), and cleanup. + Solution computeSolution() { + setup(); + h.setup(); + h.reduce(); + backpropagate(); + h.cleanup(); + cleanup(); + return s; + } + + /// \brief Add to the end of the stack. + /// @param nItr Node iterator to add to the reduction stack. + void pushToStack(Graph::NodeItr nItr) { + getSolverNodeData(nItr).clearSolverEdges(); + stack.push_back(nItr); + } + + /// \brief Returns the solver degree of the given node. + /// @param nItr Node iterator for which degree is requested. + /// @return Node degree in the <i>solver</i> graph (not the original graph). + unsigned getSolverDegree(Graph::NodeItr nItr) { + return getSolverNodeData(nItr).getSolverDegree(); + } + + /// \brief Set the solution of the given node. + /// @param nItr Node iterator to set solution for. + /// @param selection Selection for node. + void setSolution(const Graph::NodeItr &nItr, unsigned selection) { + s.setSelection(nItr, selection); + + for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr), + aeEnd = g.adjEdgesEnd(nItr); + aeItr != aeEnd; ++aeItr) { + Graph::EdgeItr eItr(*aeItr); + Graph::NodeItr anItr(g.getEdgeOtherNode(eItr, nItr)); + getSolverNodeData(anItr).addSolverEdge(eItr); + } + } + + /// \brief Apply rule R0. + /// @param nItr Node iterator for node to apply R0 to. + /// + /// Node will be automatically pushed to the solver stack. + void applyR0(Graph::NodeItr nItr) { + assert(getSolverNodeData(nItr).getSolverDegree() == 0 && + "R0 applied to node with degree != 0."); + + // Nothing to do. Just push the node onto the reduction stack. + pushToStack(nItr); + + s.recordR0(); + } + + /// \brief Apply rule R1. + /// @param xnItr Node iterator for node to apply R1 to. + /// + /// Node will be automatically pushed to the solver stack. + void applyR1(Graph::NodeItr xnItr) { + NodeData &nd = getSolverNodeData(xnItr); + assert(nd.getSolverDegree() == 1 && + "R1 applied to node with degree != 1."); + + Graph::EdgeItr eItr = *nd.solverEdgesBegin(); + + const Matrix &eCosts = g.getEdgeCosts(eItr); + const Vector &xCosts = g.getNodeCosts(xnItr); + + // Duplicate a little to avoid transposing matrices. + if (xnItr == g.getEdgeNode1(eItr)) { + Graph::NodeItr ynItr = g.getEdgeNode2(eItr); + Vector &yCosts = g.getNodeCosts(ynItr); + for (unsigned j = 0; j < yCosts.getLength(); ++j) { + PBQPNum min = eCosts[0][j] + xCosts[0]; + for (unsigned i = 1; i < xCosts.getLength(); ++i) { + PBQPNum c = eCosts[i][j] + xCosts[i]; + if (c < min) + min = c; + } + yCosts[j] += min; + } + h.handleRemoveEdge(eItr, ynItr); + } else { + Graph::NodeItr ynItr = g.getEdgeNode1(eItr); + Vector &yCosts = g.getNodeCosts(ynItr); + for (unsigned i = 0; i < yCosts.getLength(); ++i) { + PBQPNum min = eCosts[i][0] + xCosts[0]; + for (unsigned j = 1; j < xCosts.getLength(); ++j) { + PBQPNum c = eCosts[i][j] + xCosts[j]; + if (c < min) + min = c; + } + yCosts[i] += min; + } + h.handleRemoveEdge(eItr, ynItr); + } + removeSolverEdge(eItr); + assert(nd.getSolverDegree() == 0 && + "Degree 1 with edge removed should be 0."); + pushToStack(xnItr); + s.recordR1(); + } + + /// \brief Apply rule R2. + /// @param xnItr Node iterator for node to apply R2 to. + /// + /// Node will be automatically pushed to the solver stack. + void applyR2(Graph::NodeItr xnItr) { + assert(getSolverNodeData(xnItr).getSolverDegree() == 2 && + "R2 applied to node with degree != 2."); + + NodeData &nd = getSolverNodeData(xnItr); + const Vector &xCosts = g.getNodeCosts(xnItr); + + SolverEdgeItr aeItr = nd.solverEdgesBegin(); + Graph::EdgeItr yxeItr = *aeItr, + zxeItr = *(++aeItr); + + Graph::NodeItr ynItr = g.getEdgeOtherNode(yxeItr, xnItr), + znItr = g.getEdgeOtherNode(zxeItr, xnItr); + + bool flipEdge1 = (g.getEdgeNode1(yxeItr) == xnItr), + flipEdge2 = (g.getEdgeNode1(zxeItr) == xnItr); + + const Matrix *yxeCosts = flipEdge1 ? + new Matrix(g.getEdgeCosts(yxeItr).transpose()) : + &g.getEdgeCosts(yxeItr); + + const Matrix *zxeCosts = flipEdge2 ? + new Matrix(g.getEdgeCosts(zxeItr).transpose()) : + &g.getEdgeCosts(zxeItr); + + unsigned xLen = xCosts.getLength(), + yLen = yxeCosts->getRows(), + zLen = zxeCosts->getRows(); + + Matrix delta(yLen, zLen); + + for (unsigned i = 0; i < yLen; ++i) { + for (unsigned j = 0; j < zLen; ++j) { + PBQPNum min = (*yxeCosts)[i][0] + (*zxeCosts)[j][0] + xCosts[0]; + for (unsigned k = 1; k < xLen; ++k) { + PBQPNum c = (*yxeCosts)[i][k] + (*zxeCosts)[j][k] + xCosts[k]; + if (c < min) { + min = c; + } + } + delta[i][j] = min; + } + } + + if (flipEdge1) + delete yxeCosts; + + if (flipEdge2) + delete zxeCosts; + + Graph::EdgeItr yzeItr = g.findEdge(ynItr, znItr); + bool addedEdge = false; + + if (yzeItr == g.edgesEnd()) { + yzeItr = g.addEdge(ynItr, znItr, delta); + addedEdge = true; + } else { + Matrix &yzeCosts = g.getEdgeCosts(yzeItr); + h.preUpdateEdgeCosts(yzeItr); + if (ynItr == g.getEdgeNode1(yzeItr)) { + yzeCosts += delta; + } else { + yzeCosts += delta.transpose(); + } + } + + bool nullCostEdge = tryNormaliseEdgeMatrix(yzeItr); + + if (!addedEdge) { + // If we modified the edge costs let the heuristic know. + h.postUpdateEdgeCosts(yzeItr); + } + + if (nullCostEdge) { + // If this edge ended up null remove it. + if (!addedEdge) { + // We didn't just add it, so we need to notify the heuristic + // and remove it from the solver. + h.handleRemoveEdge(yzeItr, ynItr); + h.handleRemoveEdge(yzeItr, znItr); + removeSolverEdge(yzeItr); + } + g.removeEdge(yzeItr); + } else if (addedEdge) { + // If the edge was added, and non-null, finish setting it up, add it to + // the solver & notify heuristic. + edgeDataList.push_back(EdgeData()); + g.setEdgeData(yzeItr, &edgeDataList.back()); + addSolverEdge(yzeItr); + h.handleAddEdge(yzeItr); + } + + h.handleRemoveEdge(yxeItr, ynItr); + removeSolverEdge(yxeItr); + h.handleRemoveEdge(zxeItr, znItr); + removeSolverEdge(zxeItr); + + pushToStack(xnItr); + s.recordR2(); + } + + /// \brief Record an application of the RN rule. + /// + /// For use by the HeuristicBase. + void recordRN() { s.recordRN(); } + + private: + + NodeData& getSolverNodeData(Graph::NodeItr nItr) { + return *static_cast<NodeData*>(g.getNodeData(nItr)); + } + + EdgeData& getSolverEdgeData(Graph::EdgeItr eItr) { + return *static_cast<EdgeData*>(g.getEdgeData(eItr)); + } + + void addSolverEdge(Graph::EdgeItr eItr) { + EdgeData &eData = getSolverEdgeData(eItr); + NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)), + &n2Data = getSolverNodeData(g.getEdgeNode2(eItr)); + + eData.setN1SolverEdgeItr(n1Data.addSolverEdge(eItr)); + eData.setN2SolverEdgeItr(n2Data.addSolverEdge(eItr)); + } + + void setup() { + if (h.solverRunSimplify()) { + simplify(); + } + + // Create node data objects. + for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); + nItr != nEnd; ++nItr) { + nodeDataList.push_back(NodeData()); + g.setNodeData(nItr, &nodeDataList.back()); + } + + // Create edge data objects. + for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd(); + eItr != eEnd; ++eItr) { + edgeDataList.push_back(EdgeData()); + g.setEdgeData(eItr, &edgeDataList.back()); + addSolverEdge(eItr); + } + } + + void simplify() { + disconnectTrivialNodes(); + eliminateIndependentEdges(); + } + + // Eliminate trivial nodes. + void disconnectTrivialNodes() { + unsigned numDisconnected = 0; + + for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); + nItr != nEnd; ++nItr) { + + if (g.getNodeCosts(nItr).getLength() == 1) { + + std::vector<Graph::EdgeItr> edgesToRemove; + + for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr), + aeEnd = g.adjEdgesEnd(nItr); + aeItr != aeEnd; ++aeItr) { + + Graph::EdgeItr eItr = *aeItr; + + if (g.getEdgeNode1(eItr) == nItr) { + Graph::NodeItr otherNodeItr = g.getEdgeNode2(eItr); + g.getNodeCosts(otherNodeItr) += + g.getEdgeCosts(eItr).getRowAsVector(0); + } + else { + Graph::NodeItr otherNodeItr = g.getEdgeNode1(eItr); + g.getNodeCosts(otherNodeItr) += + g.getEdgeCosts(eItr).getColAsVector(0); + } + + edgesToRemove.push_back(eItr); + } + + if (!edgesToRemove.empty()) + ++numDisconnected; + + while (!edgesToRemove.empty()) { + g.removeEdge(edgesToRemove.back()); + edgesToRemove.pop_back(); + } + } + } + } + + void eliminateIndependentEdges() { + std::vector<Graph::EdgeItr> edgesToProcess; + unsigned numEliminated = 0; + + for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd(); + eItr != eEnd; ++eItr) { + edgesToProcess.push_back(eItr); + } + + while (!edgesToProcess.empty()) { + if (tryToEliminateEdge(edgesToProcess.back())) + ++numEliminated; + edgesToProcess.pop_back(); + } + } + + bool tryToEliminateEdge(Graph::EdgeItr eItr) { + if (tryNormaliseEdgeMatrix(eItr)) { + g.removeEdge(eItr); + return true; + } + return false; + } + + bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) { + + const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity(); + + Matrix &edgeCosts = g.getEdgeCosts(eItr); + Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)), + &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr)); + + for (unsigned r = 0; r < edgeCosts.getRows(); ++r) { + PBQPNum rowMin = infinity; + + for (unsigned c = 0; c < edgeCosts.getCols(); ++c) { + if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin) + rowMin = edgeCosts[r][c]; + } + + uCosts[r] += rowMin; + + if (rowMin != infinity) { + edgeCosts.subFromRow(r, rowMin); + } + else { + edgeCosts.setRow(r, 0); + } + } + + for (unsigned c = 0; c < edgeCosts.getCols(); ++c) { + PBQPNum colMin = infinity; + + for (unsigned r = 0; r < edgeCosts.getRows(); ++r) { + if (uCosts[r] != infinity && edgeCosts[r][c] < colMin) + colMin = edgeCosts[r][c]; + } + + vCosts[c] += colMin; + + if (colMin != infinity) { + edgeCosts.subFromCol(c, colMin); + } + else { + edgeCosts.setCol(c, 0); + } + } + + return edgeCosts.isZero(); + } + + void backpropagate() { + while (!stack.empty()) { + computeSolution(stack.back()); + stack.pop_back(); + } + } + + void computeSolution(Graph::NodeItr nItr) { + + NodeData &nodeData = getSolverNodeData(nItr); + + Vector v(g.getNodeCosts(nItr)); + + // Solve based on existing solved edges. + for (SolverEdgeItr solvedEdgeItr = nodeData.solverEdgesBegin(), + solvedEdgeEnd = nodeData.solverEdgesEnd(); + solvedEdgeItr != solvedEdgeEnd; ++solvedEdgeItr) { + + Graph::EdgeItr eItr(*solvedEdgeItr); + Matrix &edgeCosts = g.getEdgeCosts(eItr); + + if (nItr == g.getEdgeNode1(eItr)) { + Graph::NodeItr adjNode(g.getEdgeNode2(eItr)); + unsigned adjSolution = s.getSelection(adjNode); + v += edgeCosts.getColAsVector(adjSolution); + } + else { + Graph::NodeItr adjNode(g.getEdgeNode1(eItr)); + unsigned adjSolution = s.getSelection(adjNode); + v += edgeCosts.getRowAsVector(adjSolution); + } + + } + + setSolution(nItr, v.minIndex()); + } + + void cleanup() { + h.cleanup(); + nodeDataList.clear(); + edgeDataList.clear(); + } + }; + + /// \brief PBQP heuristic solver class. + /// + /// Given a PBQP Graph g representing a PBQP problem, you can find a solution + /// by calling + /// <tt>Solution s = HeuristicSolver<H>::solve(g);</tt> + /// + /// The choice of heuristic for the H parameter will affect both the solver + /// speed and solution quality. The heuristic should be chosen based on the + /// nature of the problem being solved. + /// Currently the only solver included with LLVM is the Briggs heuristic for + /// register allocation. + template <typename HImpl> + class HeuristicSolver { + public: + static Solution solve(Graph &g) { + HeuristicSolverImpl<HImpl> hs(g); + return hs.computeSolution(); + } + }; + +} + +#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H diff --git a/contrib/llvm/lib/CodeGen/PBQP/Heuristics/Briggs.h b/contrib/llvm/lib/CodeGen/PBQP/Heuristics/Briggs.h new file mode 100644 index 0000000..18eaf7c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -0,0 +1,460 @@ +//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the Briggs test for "allocability" of nodes in a +// PBQP graph representing a register allocation problem. Nodes which can be +// proven allocable (by a safe and relatively accurate test) are removed from +// the PBQP graph first. If no provably allocable node is present in the graph +// then the node with the minimal spill-cost to degree ratio is removed. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H +#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H + +#include "../HeuristicSolver.h" +#include "../HeuristicBase.h" + +#include <set> +#include <limits> + +namespace PBQP { + namespace Heuristics { + + /// \brief PBQP Heuristic which applies an allocability test based on + /// Briggs. + /// + /// This heuristic assumes that the elements of cost vectors in the PBQP + /// problem represent storage options, with the first being the spill + /// option and subsequent elements representing legal registers for the + /// corresponding node. Edge cost matrices are likewise assumed to represent + /// register constraints. + /// If one or more nodes can be proven allocable by this heuristic (by + /// inspection of their constraint matrices) then the allocable node of + /// highest degree is selected for the next reduction and pushed to the + /// solver stack. If no nodes can be proven allocable then the node with + /// the lowest estimated spill cost is selected and push to the solver stack + /// instead. + /// + /// This implementation is built on top of HeuristicBase. + class Briggs : public HeuristicBase<Briggs> { + private: + + class LinkDegreeComparator { + public: + LinkDegreeComparator(HeuristicSolverImpl<Briggs> &s) : s(&s) {} + bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const { + if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr)) + return true; + return false; + } + private: + HeuristicSolverImpl<Briggs> *s; + }; + + class SpillCostComparator { + public: + SpillCostComparator(HeuristicSolverImpl<Briggs> &s) + : s(&s), g(&s.getGraph()) {} + bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const { + PBQPNum cost1 = g->getNodeCosts(n1Itr)[0] / s->getSolverDegree(n1Itr), + cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr); + if (cost1 < cost2) + return true; + return false; + } + + private: + HeuristicSolverImpl<Briggs> *s; + Graph *g; + }; + + typedef std::list<Graph::NodeItr> RNAllocableList; + typedef RNAllocableList::iterator RNAllocableListItr; + + typedef std::list<Graph::NodeItr> RNUnallocableList; + typedef RNUnallocableList::iterator RNUnallocableListItr; + + public: + + struct NodeData { + typedef std::vector<unsigned> UnsafeDegreesArray; + bool isHeuristic, isAllocable, isInitialized; + unsigned numDenied, numSafe; + UnsafeDegreesArray unsafeDegrees; + RNAllocableListItr rnaItr; + RNUnallocableListItr rnuItr; + + NodeData() + : isHeuristic(false), isAllocable(false), isInitialized(false), + numDenied(0), numSafe(0) { } + }; + + struct EdgeData { + typedef std::vector<unsigned> UnsafeArray; + unsigned worst, reverseWorst; + UnsafeArray unsafe, reverseUnsafe; + bool isUpToDate; + + EdgeData() : worst(0), reverseWorst(0), isUpToDate(false) {} + }; + + /// \brief Construct an instance of the Briggs heuristic. + /// @param solver A reference to the solver which is using this heuristic. + Briggs(HeuristicSolverImpl<Briggs> &solver) : + HeuristicBase<Briggs>(solver) {} + + /// \brief Determine whether a node should be reduced using optimal + /// reduction. + /// @param nItr Node iterator to be considered. + /// @return True if the given node should be optimally reduced, false + /// otherwise. + /// + /// Selects nodes of degree 0, 1 or 2 for optimal reduction, with one + /// exception. Nodes whose spill cost (element 0 of their cost vector) is + /// infinite are checked for allocability first. Allocable nodes may be + /// optimally reduced, but nodes whose allocability cannot be proven are + /// selected for heuristic reduction instead. + bool shouldOptimallyReduce(Graph::NodeItr nItr) { + if (getSolver().getSolverDegree(nItr) < 3) { + return true; + } + // else + return false; + } + + /// \brief Add a node to the heuristic reduce list. + /// @param nItr Node iterator to add to the heuristic reduce list. + void addToHeuristicReduceList(Graph::NodeItr nItr) { + NodeData &nd = getHeuristicNodeData(nItr); + initializeNode(nItr); + nd.isHeuristic = true; + if (nd.isAllocable) { + nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr); + } else { + nd.rnuItr = rnUnallocableList.insert(rnUnallocableList.end(), nItr); + } + } + + /// \brief Heuristically reduce one of the nodes in the heuristic + /// reduce list. + /// @return True if a reduction takes place, false if the heuristic reduce + /// list is empty. + /// + /// If the list of allocable nodes is non-empty a node is selected + /// from it and pushed to the stack. Otherwise if the non-allocable list + /// is non-empty a node is selected from it and pushed to the stack. + /// If both lists are empty the method simply returns false with no action + /// taken. + bool heuristicReduce() { + if (!rnAllocableList.empty()) { + RNAllocableListItr rnaItr = + min_element(rnAllocableList.begin(), rnAllocableList.end(), + LinkDegreeComparator(getSolver())); + Graph::NodeItr nItr = *rnaItr; + rnAllocableList.erase(rnaItr); + handleRemoveNode(nItr); + getSolver().pushToStack(nItr); + return true; + } else if (!rnUnallocableList.empty()) { + RNUnallocableListItr rnuItr = + min_element(rnUnallocableList.begin(), rnUnallocableList.end(), + SpillCostComparator(getSolver())); + Graph::NodeItr nItr = *rnuItr; + rnUnallocableList.erase(rnuItr); + handleRemoveNode(nItr); + getSolver().pushToStack(nItr); + return true; + } + // else + return false; + } + + /// \brief Prepare a change in the costs on the given edge. + /// @param eItr Edge iterator. + void preUpdateEdgeCosts(Graph::EdgeItr eItr) { + Graph &g = getGraph(); + Graph::NodeItr n1Itr = g.getEdgeNode1(eItr), + n2Itr = g.getEdgeNode2(eItr); + NodeData &n1 = getHeuristicNodeData(n1Itr), + &n2 = getHeuristicNodeData(n2Itr); + + if (n1.isHeuristic) + subtractEdgeContributions(eItr, getGraph().getEdgeNode1(eItr)); + if (n2.isHeuristic) + subtractEdgeContributions(eItr, getGraph().getEdgeNode2(eItr)); + + EdgeData &ed = getHeuristicEdgeData(eItr); + ed.isUpToDate = false; + } + + /// \brief Handle the change in the costs on the given edge. + /// @param eItr Edge iterator. + void postUpdateEdgeCosts(Graph::EdgeItr eItr) { + // This is effectively the same as adding a new edge now, since + // we've factored out the costs of the old one. + handleAddEdge(eItr); + } + + /// \brief Handle the addition of a new edge into the PBQP graph. + /// @param eItr Edge iterator for the added edge. + /// + /// Updates allocability of any nodes connected by this edge which are + /// being managed by the heuristic. If allocability changes they are + /// moved to the appropriate list. + void handleAddEdge(Graph::EdgeItr eItr) { + Graph &g = getGraph(); + Graph::NodeItr n1Itr = g.getEdgeNode1(eItr), + n2Itr = g.getEdgeNode2(eItr); + NodeData &n1 = getHeuristicNodeData(n1Itr), + &n2 = getHeuristicNodeData(n2Itr); + + // If neither node is managed by the heuristic there's nothing to be + // done. + if (!n1.isHeuristic && !n2.isHeuristic) + return; + + // Ok - we need to update at least one node. + computeEdgeContributions(eItr); + + // Update node 1 if it's managed by the heuristic. + if (n1.isHeuristic) { + bool n1WasAllocable = n1.isAllocable; + addEdgeContributions(eItr, n1Itr); + updateAllocability(n1Itr); + if (n1WasAllocable && !n1.isAllocable) { + rnAllocableList.erase(n1.rnaItr); + n1.rnuItr = + rnUnallocableList.insert(rnUnallocableList.end(), n1Itr); + } + } + + // Likewise for node 2. + if (n2.isHeuristic) { + bool n2WasAllocable = n2.isAllocable; + addEdgeContributions(eItr, n2Itr); + updateAllocability(n2Itr); + if (n2WasAllocable && !n2.isAllocable) { + rnAllocableList.erase(n2.rnaItr); + n2.rnuItr = + rnUnallocableList.insert(rnUnallocableList.end(), n2Itr); + } + } + } + + /// \brief Handle disconnection of an edge from a node. + /// @param eItr Edge iterator for edge being disconnected. + /// @param nItr Node iterator for the node being disconnected from. + /// + /// Updates allocability of the given node and, if appropriate, moves the + /// node to a new list. + void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) { + NodeData &nd = getHeuristicNodeData(nItr); + + // If the node is not managed by the heuristic there's nothing to be + // done. + if (!nd.isHeuristic) + return; + + EdgeData &ed = getHeuristicEdgeData(eItr); + (void)ed; + assert(ed.isUpToDate && "Edge data is not up to date."); + + // Update node. + bool ndWasAllocable = nd.isAllocable; + subtractEdgeContributions(eItr, nItr); + updateAllocability(nItr); + + // If the node has gone optimal... + if (shouldOptimallyReduce(nItr)) { + nd.isHeuristic = false; + addToOptimalReduceList(nItr); + if (ndWasAllocable) { + rnAllocableList.erase(nd.rnaItr); + } else { + rnUnallocableList.erase(nd.rnuItr); + } + } else { + // Node didn't go optimal, but we might have to move it + // from "unallocable" to "allocable". + if (!ndWasAllocable && nd.isAllocable) { + rnUnallocableList.erase(nd.rnuItr); + nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr); + } + } + } + + private: + + NodeData& getHeuristicNodeData(Graph::NodeItr nItr) { + return getSolver().getHeuristicNodeData(nItr); + } + + EdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) { + return getSolver().getHeuristicEdgeData(eItr); + } + + // Work out what this edge will contribute to the allocability of the + // nodes connected to it. + void computeEdgeContributions(Graph::EdgeItr eItr) { + EdgeData &ed = getHeuristicEdgeData(eItr); + + if (ed.isUpToDate) + return; // Edge data is already up to date. + + Matrix &eCosts = getGraph().getEdgeCosts(eItr); + + unsigned numRegs = eCosts.getRows() - 1, + numReverseRegs = eCosts.getCols() - 1; + + std::vector<unsigned> rowInfCounts(numRegs, 0), + colInfCounts(numReverseRegs, 0); + + ed.worst = 0; + ed.reverseWorst = 0; + ed.unsafe.clear(); + ed.unsafe.resize(numRegs, 0); + ed.reverseUnsafe.clear(); + ed.reverseUnsafe.resize(numReverseRegs, 0); + + for (unsigned i = 0; i < numRegs; ++i) { + for (unsigned j = 0; j < numReverseRegs; ++j) { + if (eCosts[i + 1][j + 1] == + std::numeric_limits<PBQPNum>::infinity()) { + ed.unsafe[i] = 1; + ed.reverseUnsafe[j] = 1; + ++rowInfCounts[i]; + ++colInfCounts[j]; + + if (colInfCounts[j] > ed.worst) { + ed.worst = colInfCounts[j]; + } + + if (rowInfCounts[i] > ed.reverseWorst) { + ed.reverseWorst = rowInfCounts[i]; + } + } + } + } + + ed.isUpToDate = true; + } + + // Add the contributions of the given edge to the given node's + // numDenied and safe members. No action is taken other than to update + // these member values. Once updated these numbers can be used by clients + // to update the node's allocability. + void addEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) { + EdgeData &ed = getHeuristicEdgeData(eItr); + + assert(ed.isUpToDate && "Using out-of-date edge numbers."); + + NodeData &nd = getHeuristicNodeData(nItr); + unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; + + bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr); + EdgeData::UnsafeArray &unsafe = + nIsNode1 ? ed.unsafe : ed.reverseUnsafe; + nd.numDenied += nIsNode1 ? ed.worst : ed.reverseWorst; + + for (unsigned r = 0; r < numRegs; ++r) { + if (unsafe[r]) { + if (nd.unsafeDegrees[r]==0) { + --nd.numSafe; + } + ++nd.unsafeDegrees[r]; + } + } + } + + // Subtract the contributions of the given edge to the given node's + // numDenied and safe members. No action is taken other than to update + // these member values. Once updated these numbers can be used by clients + // to update the node's allocability. + void subtractEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) { + EdgeData &ed = getHeuristicEdgeData(eItr); + + assert(ed.isUpToDate && "Using out-of-date edge numbers."); + + NodeData &nd = getHeuristicNodeData(nItr); + unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; + + bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr); + EdgeData::UnsafeArray &unsafe = + nIsNode1 ? ed.unsafe : ed.reverseUnsafe; + nd.numDenied -= nIsNode1 ? ed.worst : ed.reverseWorst; + + for (unsigned r = 0; r < numRegs; ++r) { + if (unsafe[r]) { + if (nd.unsafeDegrees[r] == 1) { + ++nd.numSafe; + } + --nd.unsafeDegrees[r]; + } + } + } + + void updateAllocability(Graph::NodeItr nItr) { + NodeData &nd = getHeuristicNodeData(nItr); + unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; + nd.isAllocable = nd.numDenied < numRegs || nd.numSafe > 0; + } + + void initializeNode(Graph::NodeItr nItr) { + NodeData &nd = getHeuristicNodeData(nItr); + + if (nd.isInitialized) + return; // Node data is already up to date. + + unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; + + nd.numDenied = 0; + nd.numSafe = numRegs; + nd.unsafeDegrees.resize(numRegs, 0); + + typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr; + + for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(nItr), + aeEnd = getSolver().solverEdgesEnd(nItr); + aeItr != aeEnd; ++aeItr) { + + Graph::EdgeItr eItr = *aeItr; + computeEdgeContributions(eItr); + addEdgeContributions(eItr, nItr); + } + + updateAllocability(nItr); + nd.isInitialized = true; + } + + void handleRemoveNode(Graph::NodeItr xnItr) { + typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr; + std::vector<Graph::EdgeItr> edgesToRemove; + for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(xnItr), + aeEnd = getSolver().solverEdgesEnd(xnItr); + aeItr != aeEnd; ++aeItr) { + Graph::NodeItr ynItr = getGraph().getEdgeOtherNode(*aeItr, xnItr); + handleRemoveEdge(*aeItr, ynItr); + edgesToRemove.push_back(*aeItr); + } + while (!edgesToRemove.empty()) { + getSolver().removeSolverEdge(edgesToRemove.back()); + edgesToRemove.pop_back(); + } + } + + RNAllocableList rnAllocableList; + RNUnallocableList rnUnallocableList; + }; + + } +} + + +#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H diff --git a/contrib/llvm/lib/CodeGen/PBQP/Math.h b/contrib/llvm/lib/CodeGen/PBQP/Math.h new file mode 100644 index 0000000..e7598bf --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PBQP/Math.h @@ -0,0 +1,288 @@ +//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_MATH_H +#define LLVM_CODEGEN_PBQP_MATH_H + +#include <cassert> +#include <algorithm> +#include <functional> + +namespace PBQP { + +typedef float PBQPNum; + +/// \brief PBQP Vector class. +class Vector { + public: + + /// \brief Construct a PBQP vector of the given size. + explicit Vector(unsigned length) : + length(length), data(new PBQPNum[length]) { + } + + /// \brief Construct a PBQP vector with initializer. + Vector(unsigned length, PBQPNum initVal) : + length(length), data(new PBQPNum[length]) { + std::fill(data, data + length, initVal); + } + + /// \brief Copy construct a PBQP vector. + Vector(const Vector &v) : + length(v.length), data(new PBQPNum[length]) { + std::copy(v.data, v.data + length, data); + } + + /// \brief Destroy this vector, return its memory. + ~Vector() { delete[] data; } + + /// \brief Assignment operator. + Vector& operator=(const Vector &v) { + delete[] data; + length = v.length; + data = new PBQPNum[length]; + std::copy(v.data, v.data + length, data); + return *this; + } + + /// \brief Return the length of the vector + unsigned getLength() const { + return length; + } + + /// \brief Element access. + PBQPNum& operator[](unsigned index) { + assert(index < length && "Vector element access out of bounds."); + return data[index]; + } + + /// \brief Const element access. + const PBQPNum& operator[](unsigned index) const { + assert(index < length && "Vector element access out of bounds."); + return data[index]; + } + + /// \brief Add another vector to this one. + Vector& operator+=(const Vector &v) { + assert(length == v.length && "Vector length mismatch."); + std::transform(data, data + length, v.data, data, std::plus<PBQPNum>()); + return *this; + } + + /// \brief Subtract another vector from this one. + Vector& operator-=(const Vector &v) { + assert(length == v.length && "Vector length mismatch."); + std::transform(data, data + length, v.data, data, std::minus<PBQPNum>()); + return *this; + } + + /// \brief Returns the index of the minimum value in this vector + unsigned minIndex() const { + return std::min_element(data, data + length) - data; + } + + private: + unsigned length; + PBQPNum *data; +}; + +/// \brief Output a textual representation of the given vector on the given +/// output stream. +template <typename OStream> +OStream& operator<<(OStream &os, const Vector &v) { + assert((v.getLength() != 0) && "Zero-length vector badness."); + + os << "[ " << v[0]; + for (unsigned i = 1; i < v.getLength(); ++i) { + os << ", " << v[i]; + } + os << " ]"; + + return os; +} + + +/// \brief PBQP Matrix class +class Matrix { + public: + + /// \brief Construct a PBQP Matrix with the given dimensions. + Matrix(unsigned rows, unsigned cols) : + rows(rows), cols(cols), data(new PBQPNum[rows * cols]) { + } + + /// \brief Construct a PBQP Matrix with the given dimensions and initial + /// value. + Matrix(unsigned rows, unsigned cols, PBQPNum initVal) : + rows(rows), cols(cols), data(new PBQPNum[rows * cols]) { + std::fill(data, data + (rows * cols), initVal); + } + + /// \brief Copy construct a PBQP matrix. + Matrix(const Matrix &m) : + rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) { + std::copy(m.data, m.data + (rows * cols), data); + } + + /// \brief Destroy this matrix, return its memory. + ~Matrix() { delete[] data; } + + /// \brief Assignment operator. + Matrix& operator=(const Matrix &m) { + delete[] data; + rows = m.rows; cols = m.cols; + data = new PBQPNum[rows * cols]; + std::copy(m.data, m.data + (rows * cols), data); + return *this; + } + + /// \brief Return the number of rows in this matrix. + unsigned getRows() const { return rows; } + + /// \brief Return the number of cols in this matrix. + unsigned getCols() const { return cols; } + + /// \brief Matrix element access. + PBQPNum* operator[](unsigned r) { + assert(r < rows && "Row out of bounds."); + return data + (r * cols); + } + + /// \brief Matrix element access. + const PBQPNum* operator[](unsigned r) const { + assert(r < rows && "Row out of bounds."); + return data + (r * cols); + } + + /// \brief Returns the given row as a vector. + Vector getRowAsVector(unsigned r) const { + Vector v(cols); + for (unsigned c = 0; c < cols; ++c) + v[c] = (*this)[r][c]; + return v; + } + + /// \brief Returns the given column as a vector. + Vector getColAsVector(unsigned c) const { + Vector v(rows); + for (unsigned r = 0; r < rows; ++r) + v[r] = (*this)[r][c]; + return v; + } + + /// \brief Reset the matrix to the given value. + Matrix& reset(PBQPNum val = 0) { + std::fill(data, data + (rows * cols), val); + return *this; + } + + /// \brief Set a single row of this matrix to the given value. + Matrix& setRow(unsigned r, PBQPNum val) { + assert(r < rows && "Row out of bounds."); + std::fill(data + (r * cols), data + ((r + 1) * cols), val); + return *this; + } + + /// \brief Set a single column of this matrix to the given value. + Matrix& setCol(unsigned c, PBQPNum val) { + assert(c < cols && "Column out of bounds."); + for (unsigned r = 0; r < rows; ++r) + (*this)[r][c] = val; + return *this; + } + + /// \brief Matrix transpose. + Matrix transpose() const { + Matrix m(cols, rows); + for (unsigned r = 0; r < rows; ++r) + for (unsigned c = 0; c < cols; ++c) + m[c][r] = (*this)[r][c]; + return m; + } + + /// \brief Returns the diagonal of the matrix as a vector. + /// + /// Matrix must be square. + Vector diagonalize() const { + assert(rows == cols && "Attempt to diagonalize non-square matrix."); + + Vector v(rows); + for (unsigned r = 0; r < rows; ++r) + v[r] = (*this)[r][r]; + return v; + } + + /// \brief Add the given matrix to this one. + Matrix& operator+=(const Matrix &m) { + assert(rows == m.rows && cols == m.cols && + "Matrix dimensions mismatch."); + std::transform(data, data + (rows * cols), m.data, data, + std::plus<PBQPNum>()); + return *this; + } + + /// \brief Returns the minimum of the given row + PBQPNum getRowMin(unsigned r) const { + assert(r < rows && "Row out of bounds"); + return *std::min_element(data + (r * cols), data + ((r + 1) * cols)); + } + + /// \brief Returns the minimum of the given column + PBQPNum getColMin(unsigned c) const { + PBQPNum minElem = (*this)[0][c]; + for (unsigned r = 1; r < rows; ++r) + if ((*this)[r][c] < minElem) minElem = (*this)[r][c]; + return minElem; + } + + /// \brief Subtracts the given scalar from the elements of the given row. + Matrix& subFromRow(unsigned r, PBQPNum val) { + assert(r < rows && "Row out of bounds"); + std::transform(data + (r * cols), data + ((r + 1) * cols), + data + (r * cols), + std::bind2nd(std::minus<PBQPNum>(), val)); + return *this; + } + + /// \brief Subtracts the given scalar from the elements of the given column. + Matrix& subFromCol(unsigned c, PBQPNum val) { + for (unsigned r = 0; r < rows; ++r) + (*this)[r][c] -= val; + return *this; + } + + /// \brief Returns true if this is a zero matrix. + bool isZero() const { + return find_if(data, data + (rows * cols), + std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) == + data + (rows * cols); + } + + private: + unsigned rows, cols; + PBQPNum *data; +}; + +/// \brief Output a textual representation of the given matrix on the given +/// output stream. +template <typename OStream> +OStream& operator<<(OStream &os, const Matrix &m) { + + assert((m.getRows() != 0) && "Zero-row matrix badness."); + + for (unsigned i = 0; i < m.getRows(); ++i) { + os << m.getRowAsVector(i); + } + + return os; +} + +} + +#endif // LLVM_CODEGEN_PBQP_MATH_H diff --git a/contrib/llvm/lib/CodeGen/PBQP/Solution.h b/contrib/llvm/lib/CodeGen/PBQP/Solution.h new file mode 100644 index 0000000..047fd04 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PBQP/Solution.h @@ -0,0 +1,89 @@ +//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// PBQP Solution class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H +#define LLVM_CODEGEN_PBQP_SOLUTION_H + +#include "Math.h" +#include "Graph.h" + +#include <map> + +namespace PBQP { + + /// \brief Represents a solution to a PBQP problem. + /// + /// To get the selection for each node in the problem use the getSelection method. + class Solution { + private: + + typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap; + SelectionsMap selections; + + unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions; + + public: + + /// \brief Number of nodes for which selections have been made. + /// @return Number of nodes for which selections have been made. + unsigned numNodes() const { return selections.size(); } + + /// \brief Records a reduction via the R0 rule. Should be called from the + /// solver only. + void recordR0() { ++r0Reductions; } + + /// \brief Returns the number of R0 reductions applied to solve the problem. + unsigned numR0Reductions() const { return r0Reductions; } + + /// \brief Records a reduction via the R1 rule. Should be called from the + /// solver only. + void recordR1() { ++r1Reductions; } + + /// \brief Returns the number of R1 reductions applied to solve the problem. + unsigned numR1Reductions() const { return r1Reductions; } + + /// \brief Records a reduction via the R2 rule. Should be called from the + /// solver only. + void recordR2() { ++r2Reductions; } + + /// \brief Returns the number of R2 reductions applied to solve the problem. + unsigned numR2Reductions() const { return r2Reductions; } + + /// \brief Records a reduction via the RN rule. Should be called from the + /// solver only. + void recordRN() { ++ rNReductions; } + + /// \brief Returns the number of RN reductions applied to solve the problem. + unsigned numRNReductions() const { return rNReductions; } + + /// \brief Set the selection for a given node. + /// @param nItr Node iterator. + /// @param selection Selection for nItr. + void setSelection(Graph::NodeItr nItr, unsigned selection) { + selections[nItr] = selection; + } + + /// \brief Get a node's selection. + /// @param nItr Node iterator. + /// @return The selection for nItr; + unsigned getSelection(Graph::NodeItr nItr) const { + SelectionsMap::const_iterator sItr = selections.find(nItr); + assert(sItr != selections.end() && "No selection for node."); + return sItr->second; + } + + }; + +} + +#endif // LLVM_CODEGEN_PBQP_SOLUTION_H diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp new file mode 100644 index 0000000..d4df4c5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -0,0 +1,412 @@ +//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates machine instruction PHI nodes by inserting copy +// instructions. This destroys SSA information, but is the desired input for +// some register allocators. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "phielim" +#include "PHIElimination.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Function.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include <algorithm> +#include <map> +using namespace llvm; + +STATISTIC(NumAtomic, "Number of atomic phis lowered"); +STATISTIC(NumReused, "Number of reused lowered phis"); + +char PHIElimination::ID = 0; +INITIALIZE_PASS(PHIElimination, "phi-node-elimination", + "Eliminate PHI nodes for register allocation", false, false); + +char &llvm::PHIEliminationID = PHIElimination::ID; + +void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<LiveVariables>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) { + MRI = &MF.getRegInfo(); + + bool Changed = false; + + // Split critical edges to help the coalescer + if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) { + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= SplitPHIEdges(MF, *I, *LV, MLI); + } + + // Populate VRegPHIUseCount + analyzePHINodes(MF); + + // Eliminate PHI instructions by inserting copies into predecessor blocks. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= EliminatePHINodes(MF, *I); + + // Remove dead IMPLICIT_DEF instructions. + for (SmallPtrSet<MachineInstr*, 4>::iterator I = ImpDefs.begin(), + E = ImpDefs.end(); I != E; ++I) { + MachineInstr *DefMI = *I; + unsigned DefReg = DefMI->getOperand(0).getReg(); + if (MRI->use_nodbg_empty(DefReg)) + DefMI->eraseFromParent(); + } + + // Clean up the lowered PHI instructions. + for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end(); + I != E; ++I) + MF.DeleteMachineInstr(I->first); + + LoweredPHIs.clear(); + ImpDefs.clear(); + VRegPHIUseCount.clear(); + + return Changed; +} + +/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in +/// predecessor basic blocks. +/// +bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF, + MachineBasicBlock &MBB) { + if (MBB.empty() || !MBB.front().isPHI()) + return false; // Quick exit for basic blocks without PHIs. + + // Get an iterator to the first instruction after the last PHI node (this may + // also be the end of the basic block). + MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin()); + + while (MBB.front().isPHI()) + LowerAtomicPHINode(MBB, AfterPHIsIt); + + return true; +} + +/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node +/// are implicit_def's. +static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, + const MachineRegisterInfo *MRI) { + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { + unsigned SrcReg = MPhi->getOperand(i).getReg(); + const MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (!DefMI || !DefMI->isImplicitDef()) + return false; + } + return true; +} + +// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg +// when following the CFG edge to SuccMBB. This needs to be after any def of +// SrcReg, but before any subsequent point where control flow might jump out of +// the basic block. +MachineBasicBlock::iterator +llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, + MachineBasicBlock &SuccMBB, + unsigned SrcReg) { + // Handle the trivial case trivially. + if (MBB.empty()) + return MBB.begin(); + + // Usually, we just want to insert the copy before the first terminator + // instruction. However, for the edge going to a landing pad, we must insert + // the copy before the call/invoke instruction. + if (!SuccMBB.isLandingPad()) + return MBB.getFirstTerminator(); + + // Discover any defs/uses in this basic block. + SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), + RE = MRI->reg_end(); RI != RE; ++RI) { + MachineInstr *DefUseMI = &*RI; + if (DefUseMI->getParent() == &MBB) + DefUsesInMBB.insert(DefUseMI); + } + + MachineBasicBlock::iterator InsertPoint; + if (DefUsesInMBB.empty()) { + // No defs. Insert the copy at the start of the basic block. + InsertPoint = MBB.begin(); + } else if (DefUsesInMBB.size() == 1) { + // Insert the copy immediately after the def/use. + InsertPoint = *DefUsesInMBB.begin(); + ++InsertPoint; + } else { + // Insert the copy immediately after the last def/use. + InsertPoint = MBB.end(); + while (!DefUsesInMBB.count(&*--InsertPoint)) {} + ++InsertPoint; + } + + // Make sure the copy goes after any phi nodes however. + return SkipPHIsAndLabels(MBB, InsertPoint); +} + +/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, +/// under the assuption that it needs to be lowered in a way that supports +/// atomic execution of PHIs. This lowering method is always correct all of the +/// time. +/// +void llvm::PHIElimination::LowerAtomicPHINode( + MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt) { + ++NumAtomic; + // Unlink the PHI node from the basic block, but don't delete the PHI yet. + MachineInstr *MPhi = MBB.remove(MBB.begin()); + + unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; + unsigned DestReg = MPhi->getOperand(0).getReg(); + assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs"); + bool isDead = MPhi->getOperand(0).isDead(); + + // Create a new register for the incoming PHI arguments. + MachineFunction &MF = *MBB.getParent(); + unsigned IncomingReg = 0; + bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? + + // Insert a register to register copy at the top of the current block (but + // after any remaining phi nodes) which copies the new incoming register + // into the phi node destination. + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + if (isSourceDefinedByImplicitDef(MPhi, MRI)) + // If all sources of a PHI node are implicit_def, just emit an + // implicit_def instead of a copy. + BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); + else { + // Can we reuse an earlier PHI node? This only happens for critical edges, + // typically those created by tail duplication. + unsigned &entry = LoweredPHIs[MPhi]; + if (entry) { + // An identical PHI node was already lowered. Reuse the incoming register. + IncomingReg = entry; + reusedIncoming = true; + ++NumReused; + DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); + } else { + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); + entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); + } + BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), DestReg) + .addReg(IncomingReg); + } + + // Update live variable information if there is any. + LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>(); + if (LV) { + MachineInstr *PHICopy = prior(AfterPHIsIt); + + if (IncomingReg) { + LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); + + // Increment use count of the newly created virtual register. + VI.NumUses++; + LV->setPHIJoin(IncomingReg); + + // When we are reusing the incoming register, it may already have been + // killed in this block. The old kill will also have been inserted at + // AfterPHIsIt, so it appears before the current PHICopy. + if (reusedIncoming) + if (MachineInstr *OldKill = VI.findKill(&MBB)) { + DEBUG(dbgs() << "Remove old kill from " << *OldKill); + LV->removeVirtualRegisterKilled(IncomingReg, OldKill); + DEBUG(MBB.dump()); + } + + // Add information to LiveVariables to know that the incoming value is + // killed. Note that because the value is defined in several places (once + // each for each incoming block), the "def" block and instruction fields + // for the VarInfo is not filled in. + LV->addVirtualRegisterKilled(IncomingReg, PHICopy); + } + + // Since we are going to be deleting the PHI node, if it is the last use of + // any registers, or if the value itself is dead, we need to move this + // information over to the new copy we just inserted. + LV->removeVirtualRegistersKilled(MPhi); + + // If the result is dead, update LV. + if (isDead) { + LV->addVirtualRegisterDead(DestReg, PHICopy); + LV->removeVirtualRegisterDead(DestReg, MPhi); + } + } + + // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) + --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), + MPhi->getOperand(i).getReg())]; + + // Now loop over all of the incoming arguments, changing them to copy into the + // IncomingReg register in the corresponding predecessor basic block. + SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; + for (int i = NumSrcs - 1; i >= 0; --i) { + unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); + unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); + + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + "Machine PHI Operands must all be virtual registers!"); + + // Get the MachineBasicBlock equivalent of the BasicBlock that is the source + // path the PHI. + MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); + + // If source is defined by an implicit def, there is no need to insert a + // copy. + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI->isImplicitDef()) { + ImpDefs.insert(DefMI); + continue; + } + + // Check to make sure we haven't already emitted the copy for this block. + // This can happen because PHI nodes may have multiple entries for the same + // basic block. + if (!MBBsInsertedInto.insert(&opBlock)) + continue; // If the copy has already been emitted, we're done. + + // Find a safe location to insert the copy, this may be the first terminator + // in the block (or end()). + MachineBasicBlock::iterator InsertPos = + FindCopyInsertPoint(opBlock, MBB, SrcReg); + + // Insert the copy. + if (!reusedIncoming && IncomingReg) + BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg); + + // Now update live variable information if we have it. Otherwise we're done + if (!LV) continue; + + // We want to be able to insert a kill of the register if this PHI (aka, the + // copy we just inserted) is the last use of the source value. Live + // variable analysis conservatively handles this by saying that the value is + // live until the end of the block the PHI entry lives in. If the value + // really is dead at the PHI copy, there will be no successor blocks which + // have the value live-in. + + // Also check to see if this register is in use by another PHI node which + // has not yet been eliminated. If so, it will be killed at an appropriate + // point later. + + // Is it used by any PHI instructions in this block? + bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]; + + // Okay, if we now know that the value is not live out of the block, we can + // add a kill marker in this block saying that it kills the incoming value! + if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) { + // In our final twist, we have to decide which instruction kills the + // register. In most cases this is the copy, however, the first + // terminator instruction at the end of the block may also use the value. + // In this case, we should mark *it* as being the killing block, not the + // copy. + MachineBasicBlock::iterator KillInst; + MachineBasicBlock::iterator Term = opBlock.getFirstTerminator(); + if (Term != opBlock.end() && Term->readsRegister(SrcReg)) { + KillInst = Term; + + // Check that no other terminators use values. +#ifndef NDEBUG + for (MachineBasicBlock::iterator TI = llvm::next(Term); + TI != opBlock.end(); ++TI) { + assert(!TI->readsRegister(SrcReg) && + "Terminator instructions cannot use virtual registers unless" + "they are the first terminator in a block!"); + } +#endif + } else if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't insert a copy this time. + KillInst = Term; + while (KillInst != opBlock.begin()) + if ((--KillInst)->readsRegister(SrcReg)) + break; + } else { + // We just inserted this copy. + KillInst = prior(InsertPos); + } + assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); + + // Finally, mark it killed. + LV->addVirtualRegisterKilled(SrcReg, KillInst); + + // This vreg no longer lives all of the way through opBlock. + unsigned opBlockNum = opBlock.getNumber(); + LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum); + } + } + + // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. + if (reusedIncoming || !IncomingReg) + MF.DeleteMachineInstr(MPhi); +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the number of uses of a virtual register which is +/// used in a PHI node. We map that to the BB the vreg is coming from. This is +/// used later to determine when the vreg is killed in the BB. +/// +void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) { + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(), + BBI->getOperand(i).getReg())]; +} + +bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, + MachineBasicBlock &MBB, + LiveVariables &LV, + MachineLoopInfo *MLI) { + if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) + return false; // Quick exit for basic blocks without PHIs. + + bool Changed = false; + for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI->getOperand(i).getReg(); + MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); + // We break edges when registers are live out from the predecessor block + // (not considering PHI nodes). If the register is live in to this block + // anyway, we would gain nothing from splitting. + // Avoid splitting backedges of loops. It would introduce small + // out-of-line blocks into the loop which is very bad for code placement. + if (PreMBB != &MBB && + !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) { + if (!MLI || + !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) && + MLI->isLoopHeader(&MBB))) + Changed |= PreMBB->SplitCriticalEdge(&MBB, this) != 0; + } + } + } + return true; +} diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.h b/contrib/llvm/lib/CodeGen/PHIElimination.h new file mode 100644 index 0000000..45a9718 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PHIElimination.h @@ -0,0 +1,115 @@ +//===-- lib/CodeGen/PHIElimination.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PHIELIMINATION_HPP +#define LLVM_CODEGEN_PHIELIMINATION_HPP + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + class LiveVariables; + class MachineRegisterInfo; + class MachineLoopInfo; + + /// Lower PHI instructions to copies. + class PHIElimination : public MachineFunctionPass { + MachineRegisterInfo *MRI; // Machine register information + + public: + static char ID; // Pass identification, replacement for typeid + PHIElimination() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + private: + /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions + /// in predecessor basic blocks. + /// + bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); + void LowerAtomicPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt); + + /// analyzePHINodes - Gather information about the PHI nodes in + /// here. In particular, we want to map the number of uses of a virtual + /// register which is used in a PHI node. We map that to the BB the + /// vreg is coming from. This is used later to determine when the vreg + /// is killed in the BB. + /// + void analyzePHINodes(const MachineFunction& Fn); + + /// Split critical edges where necessary for good coalescer performance. + bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, + LiveVariables &LV, MachineLoopInfo *MLI); + + /// SplitCriticalEdge - Split a critical edge from A to B by + /// inserting a new MBB. Update branches in A and PHI instructions + /// in B. Return the new block. + MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A, + MachineBasicBlock *B); + + /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from + /// SrcReg when following the CFG edge to SuccMBB. This needs to be after + /// any def of SrcReg, but before any subsequent point where control flow + /// might jump out of the basic block. + MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB, + MachineBasicBlock &SuccMBB, + unsigned SrcReg); + + // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and + // also after any exception handling labels: in landing pads execution + // starts at the label, so any copies placed before it won't be executed! + // We also deal with DBG_VALUEs, which are a bit tricky: + // PHI + // DBG_VALUE + // LABEL + // Here the DBG_VALUE needs to be skipped, and if it refers to a PHI it + // needs to be annulled or, better, moved to follow the label, as well. + // PHI + // DBG_VALUE + // no label + // Here it is not a good idea to skip the DBG_VALUE. + // FIXME: For now we skip and annul all DBG_VALUEs, maximally simple and + // maximally stupid. + MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + // Rather than assuming that EH labels come before other kinds of labels, + // just skip all labels. + while (I != MBB.end() && + (I->isPHI() || I->isLabel() || I->isDebugValue())) { + if (I->isDebugValue() && I->getNumOperands()==3 && + I->getOperand(0).isReg()) + I->getOperand(0).setReg(0U); + ++I; + } + return I; + } + + typedef std::pair<unsigned, unsigned> BBVRegPair; + typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse; + + VRegPHIUse VRegPHIUseCount; + + // Defs of PHI sources which are implicit_def. + SmallPtrSet<MachineInstr*, 4> ImpDefs; + + // Map reusable lowered PHI node -> incoming join register. + typedef DenseMap<MachineInstr*, unsigned, + MachineInstrExpressionTrait> LoweredPHIMap; + LoweredPHIMap LoweredPHIs; + }; + +} + +#endif /* LLVM_CODEGEN_PHIELIMINATION_HPP */ diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp new file mode 100644 index 0000000..3489db2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -0,0 +1,68 @@ +//===-- Passes.cpp - Target independent code generation passes ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines interfaces to access the target independent code +// generation passes provided by the LLVM backend. +// +//===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +//===---------------------------------------------------------------------===// +/// +/// RegisterRegAlloc class - Track the registration of register allocators. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry RegisterRegAlloc::Registry; + +static FunctionPass *createDefaultRegisterAllocator() { return 0; } +static RegisterRegAlloc +defaultRegAlloc("default", + "pick register allocator based on -O option", + createDefaultRegisterAllocator); + +//===---------------------------------------------------------------------===// +/// +/// RegAlloc command line options. +/// +//===---------------------------------------------------------------------===// +static cl::opt<RegisterRegAlloc::FunctionPassCtor, false, + RegisterPassParser<RegisterRegAlloc> > +RegAlloc("regalloc", + cl::init(&createDefaultRegisterAllocator), + cl::desc("Register allocator to use")); + + +//===---------------------------------------------------------------------===// +/// +/// createRegisterAllocator - choose the appropriate register allocator. +/// +//===---------------------------------------------------------------------===// +FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { + RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); + + if (!Ctor) { + Ctor = RegAlloc; + RegisterRegAlloc::setDefault(RegAlloc); + } + + if (Ctor != createDefaultRegisterAllocator) + return Ctor(); + + // When the 'default' allocator is requested, pick one based on OptLevel. + switch (OptLevel) { + case CodeGenOpt::None: + return createFastRegisterAllocator(); + default: + return createLinearScanRegisterAllocator(); + } +} diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp new file mode 100644 index 0000000..17cee46 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -0,0 +1,287 @@ +//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Perform peephole optimizations on the machine code: +// +// - Optimize Extensions +// +// Optimization of sign / zero extension instructions. It may be extended to +// handle other instructions with similar properties. +// +// On some targets, some instructions, e.g. X86 sign / zero extension, may +// leave the source value in the lower part of the result. This optimization +// will replace some uses of the pre-extension value with uses of the +// sub-register of the results. +// +// - Optimize Comparisons +// +// Optimization of comparison instructions. For instance, in this code: +// +// sub r1, 1 +// cmp r1, 0 +// bz L1 +// +// If the "sub" instruction all ready sets (or could be modified to set) the +// same flag that the "cmp" instruction sets and that "bz" uses, then we can +// eliminate the "cmp" instruction. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "peephole-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +// Optimize Extensions +static cl::opt<bool> +Aggressive("aggressive-ext-opt", cl::Hidden, + cl::desc("Aggressive extension optimization")); + +STATISTIC(NumReuse, "Number of extension results reused"); +STATISTIC(NumEliminated, "Number of compares eliminated"); + +namespace { + class PeepholeOptimizer : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + MachineDominatorTree *DT; // Machine dominator tree + + public: + static char ID; // Pass identification + PeepholeOptimizer() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + if (Aggressive) { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + } + } + + private: + bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); + bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &LocalMIs); + }; +} + +char PeepholeOptimizer::ID = 0; +INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts", + "Peephole Optimizations", false, false); + +FunctionPass *llvm::createPeepholeOptimizerPass() { + return new PeepholeOptimizer(); +} + +/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads +/// a single register and writes a single register and it does not modify the +/// source, and if the source value is preserved as a sub-register of the +/// result, then replace all reachable uses of the source with the subreg of the +/// result. +/// +/// Do not generate an EXTRACT that is used only in a debug use, as this changes +/// the code. Since this code does not currently share EXTRACTs, just ignore all +/// debug uses. +bool PeepholeOptimizer:: +OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &LocalMIs) { + LocalMIs.insert(MI); + + unsigned SrcReg, DstReg, SubIdx; + if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) + return false; + + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); + if (++UI == MRI->use_nodbg_end()) + // No other uses. + return false; + + // The source has other uses. See if we can replace the other uses with use of + // the result of the extension. + SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; + UI = MRI->use_nodbg_begin(DstReg); + for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + UI != UE; ++UI) + ReachedBBs.insert(UI->getParent()); + + // Uses that are in the same BB of uses of the result of the instruction. + SmallVector<MachineOperand*, 8> Uses; + + // Uses that the result of the instruction can reach. + SmallVector<MachineOperand*, 8> ExtendedUses; + + bool ExtendLife = true; + UI = MRI->use_nodbg_begin(SrcReg); + for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + if (UseMI == MI) + continue; + + if (UseMI->isPHI()) { + ExtendLife = false; + continue; + } + + // It's an error to translate this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 + // + // into this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1027 = COPY %reg1025:4 + // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 + // + // The problem here is that SUBREG_TO_REG is there to assert that an + // implicit zext occurs. It doesn't insert a zext instruction. If we allow + // the COPY here, it will give us the value after the <sext>, not the + // original value of %reg1024 before <sext>. + if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) + continue; + + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (UseMBB == MBB) { + // Local uses that come after the extension. + if (!LocalMIs.count(UseMI)) + Uses.push_back(&UseMO); + } else if (ReachedBBs.count(UseMBB)) { + // Non-local uses where the result of the extension is used. Always + // replace these unless it's a PHI. + Uses.push_back(&UseMO); + } else if (Aggressive && DT->dominates(MBB, UseMBB)) { + // We may want to extend the live range of the extension result in order + // to replace these uses. + ExtendedUses.push_back(&UseMO); + } else { + // Both will be live out of the def MBB anyway. Don't extend live range of + // the extension result. + ExtendLife = false; + break; + } + } + + if (ExtendLife && !ExtendedUses.empty()) + // Extend the liveness of the extension result. + std::copy(ExtendedUses.begin(), ExtendedUses.end(), + std::back_inserter(Uses)); + + // Now replace all uses. + bool Changed = false; + if (!Uses.empty()) { + SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; + + // Look for PHI uses of the extended result, we don't want to extend the + // liveness of a PHI input. It breaks all kinds of assumptions down + // stream. A PHI use is expected to be the kill of its source values. + UI = MRI->use_nodbg_begin(DstReg); + for (MachineRegisterInfo::use_nodbg_iterator + UE = MRI->use_nodbg_end(); UI != UE; ++UI) + if (UI->isPHI()) + PHIBBs.insert(UI->getParent()); + + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + for (unsigned i = 0, e = Uses.size(); i != e; ++i) { + MachineOperand *UseMO = Uses[i]; + MachineInstr *UseMI = UseMO->getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (PHIBBs.count(UseMBB)) + continue; + + unsigned NewVR = MRI->createVirtualRegister(RC); + BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) + .addReg(DstReg, 0, SubIdx); + + UseMO->setReg(NewVR); + ++NumReuse; + Changed = true; + } + } + + return Changed; +} + +/// OptimizeCmpInstr - If the instruction is a compare and the previous +/// instruction it's comparing against all ready sets (or could be modified to +/// set) the same flag as the compare, then we can remove the comparison and use +/// the flag from the previous instruction. +bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, + MachineBasicBlock *MBB) { + // If this instruction is a comparison against zero and isn't comparing a + // physical register, we can try to optimize it. + unsigned SrcReg; + int CmpValue; + if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) || + TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0) + return false; + + MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); + if (llvm::next(DI) != MRI->def_end()) + // Only support one definition. + return false; + + // Attempt to convert the defining instruction to set the "zero" flag. + if (TII->ConvertToSetZeroFlag(&*DI, MI)) { + ++NumEliminated; + return true; + } + + return false; +} + +bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + MRI = &MF.getRegInfo(); + DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; + + bool Changed = false; + + SmallPtrSet<MachineInstr*, 8> LocalMIs; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + LocalMIs.clear(); + + for (MachineBasicBlock::iterator + MII = I->begin(), ME = I->end(); MII != ME; ) { + MachineInstr *MI = &*MII; + + if (MI->getDesc().isCompare() && + !MI->getDesc().hasUnmodeledSideEffects()) { + ++MII; // The iterator may become invalid if the compare is deleted. + Changed |= OptimizeCmpInstr(MI, MBB); + } else { + Changed |= OptimizeExtInstr(MI, MBB, LocalMIs); + ++MII; + } + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp new file mode 100644 index 0000000..cbde2b0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -0,0 +1,180 @@ +//===----- PostRAHazardRecognizer.cpp - hazard recognizer -------- ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a hazard recognizer using the instructions itineraries +// defined for the current target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "llvm/CodeGen/PostRAHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrItineraries.h" + +using namespace llvm; + +PostRAHazardRecognizer:: +PostRAHazardRecognizer(const InstrItineraryData &LItinData) : + ScheduleHazardRecognizer(), ItinData(LItinData) { + // Determine the maximum depth of any itinerary. This determines the + // depth of the scoreboard. We always make the scoreboard at least 1 + // cycle deep to avoid dealing with the boundary condition. + unsigned ScoreboardDepth = 1; + if (!ItinData.isEmpty()) { + for (unsigned idx = 0; ; ++idx) { + if (ItinData.isEndMarker(idx)) + break; + + const InstrStage *IS = ItinData.beginStage(idx); + const InstrStage *E = ItinData.endStage(idx); + unsigned ItinDepth = 0; + for (; IS != E; ++IS) + ItinDepth += IS->getCycles(); + + ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); + } + } + + ReservedScoreboard.reset(ScoreboardDepth); + RequiredScoreboard.reset(ScoreboardDepth); + + DEBUG(dbgs() << "Using post-ra hazard recognizer: ScoreboardDepth = " + << ScoreboardDepth << '\n'); +} + +void PostRAHazardRecognizer::Reset() { + RequiredScoreboard.reset(); + ReservedScoreboard.reset(); +} + +void PostRAHazardRecognizer::ScoreBoard::dump() const { + dbgs() << "Scoreboard:\n"; + + unsigned last = Depth - 1; + while ((last > 0) && ((*this)[last] == 0)) + last--; + + for (unsigned i = 0; i <= last; i++) { + unsigned FUs = (*this)[i]; + dbgs() << "\t"; + for (int j = 31; j >= 0; j--) + dbgs() << ((FUs & (1 << j)) ? '1' : '0'); + dbgs() << '\n'; + } +} + +ScheduleHazardRecognizer::HazardType +PostRAHazardRecognizer::getHazardType(SUnit *SU) { + if (ItinData.isEmpty()) + return NoHazard; + + unsigned cycle = 0; + + // Use the itinerary for the underlying instruction to check for + // free FU's in the scoreboard at the appropriate future cycles. + unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + for (const InstrStage *IS = ItinData.beginStage(idx), + *E = ItinData.endStage(idx); IS != E; ++IS) { + // We must find one of the stage's units free for every cycle the + // stage is occupied. FIXME it would be more accurate to find the + // same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < RequiredScoreboard.getDepth()) && + "Scoreboard depth exceeded!"); + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + default: + assert(0 && "Invalid FU reservation"); + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + + if (!freeUnits) { + DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); + DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); + DEBUG(SU->getInstr()->dump()); + return Hazard; + } + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + return NoHazard; +} + +void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) { + if (ItinData.isEmpty()) + return; + + unsigned cycle = 0; + + // Use the itinerary for the underlying instruction to reserve FU's + // in the scoreboard at the appropriate future cycles. + unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + for (const InstrStage *IS = ItinData.beginStage(idx), + *E = ItinData.endStage(idx); IS != E; ++IS) { + // We must reserve one of the stage's units for every cycle the + // stage is occupied. FIXME it would be more accurate to reserve + // the same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < RequiredScoreboard.getDepth()) && + "Scoreboard depth exceeded!"); + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + default: + assert(0 && "Invalid FU reservation"); + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + + // reduce to a single unit + unsigned freeUnit = 0; + do { + freeUnit = freeUnits; + freeUnits = freeUnit & (freeUnit - 1); + } while (freeUnits); + + assert(freeUnit && "No function unit available!"); + if (IS->getReservationKind() == InstrStage::Required) + RequiredScoreboard[cycle + i] |= freeUnit; + else + ReservedScoreboard[cycle + i] |= freeUnit; + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + DEBUG(ReservedScoreboard.dump()); + DEBUG(RequiredScoreboard.dump()); +} + +void PostRAHazardRecognizer::AdvanceCycle() { + ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); + RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); +} diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp new file mode 100644 index 0000000..f0bd6d1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -0,0 +1,694 @@ +//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "AntiDepBreaker.h" +#include "AggressiveAntiDepBreaker.h" +#include "CriticalAntiDepBreaker.h" +#include "ScheduleDAGInstrs.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Statistic.h" +#include <set> +using namespace llvm; + +STATISTIC(NumNoops, "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); +STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies"); + +// Post-RA scheduling is enabled with +// TargetSubtarget.enablePostRAScheduler(). This flag can be used to +// override the target. +static cl::opt<bool> +EnablePostRAScheduler("post-RA-scheduler", + cl::desc("Enable scheduling after register allocation"), + cl::init(false), cl::Hidden); +static cl::opt<std::string> +EnableAntiDepBreaking("break-anti-dependencies", + cl::desc("Break post-RA scheduling anti-dependencies: " + "\"critical\", \"all\", or \"none\""), + cl::init("none"), cl::Hidden); + +// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod +static cl::opt<int> +DebugDiv("postra-sched-debugdiv", + cl::desc("Debug control MBBs that are scheduled"), + cl::init(0), cl::Hidden); +static cl::opt<int> +DebugMod("postra-sched-debugmod", + cl::desc("Debug control MBBs that are scheduled"), + cl::init(0), cl::Hidden); + +AntiDepBreaker::~AntiDepBreaker() { } + +namespace { + class PostRAScheduler : public MachineFunctionPass { + AliasAnalysis *AA; + const TargetInstrInfo *TII; + CodeGenOpt::Level OptLevel; + + public: + static char ID; + PostRAScheduler(CodeGenOpt::Level ol) : + MachineFunctionPass(ID), OptLevel(ol) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "Post RA top-down list latency scheduler"; + } + + bool runOnMachineFunction(MachineFunction &Fn); + }; + char PostRAScheduler::ID = 0; + + class SchedulePostRATDList : public ScheduleDAGInstrs { + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + LatencyPriorityQueue AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// Topo - A topological ordering for SUnits. + ScheduleDAGTopologicalSort Topo; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// AntiDepBreak - Anti-dependence breaking object, or NULL if none + AntiDepBreaker *AntiDepBreak; + + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + + /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// or ~0u if the register is not live. + std::vector<unsigned> KillIndices; + + public: + SchedulePostRATDList(MachineFunction &MF, + const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT, + ScheduleHazardRecognizer *HR, + AntiDepBreaker *ADB, + AliasAnalysis *aa) + : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), + HazardRec(HR), AntiDepBreak(ADB), AA(aa), + KillIndices(TRI->getNumRegs()) {} + + ~SchedulePostRATDList() { + } + + /// StartBlock - Initialize register live-range state for scheduling in + /// this block. + /// + void StartBlock(MachineBasicBlock *BB); + + /// Schedule - Schedule the instruction range using list scheduling. + /// + void Schedule(); + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + void Observe(MachineInstr *MI, unsigned Count); + + /// FinishBlock - Clean up register live-range state. + /// + void FinishBlock(); + + /// FixupKills - Fix register kill flags that have been made + /// invalid due to scheduling + /// + void FixupKills(MachineBasicBlock *MBB); + + private: + void ReleaseSucc(SUnit *SU, SDep *SuccEdge); + void ReleaseSuccessors(SUnit *SU); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); + void StartBlockForKills(MachineBasicBlock *BB); + + // ToggleKillFlag - Toggle a register operand kill flag. Other + // adjustments may be made to the instruction if necessary. Return + // true if the operand has been deleted, false if not. + bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); + }; +} + +bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { + AA = &getAnalysis<AliasAnalysis>(); + TII = Fn.getTarget().getInstrInfo(); + + // Check for explicit enable/disable of post-ra scheduling. + TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; + SmallVector<TargetRegisterClass*, 4> CriticalPathRCs; + if (EnablePostRAScheduler.getPosition() > 0) { + if (!EnablePostRAScheduler) + return false; + } else { + // Check that post-RA scheduling is enabled for this target. + const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>(); + if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs)) + return false; + } + + // Check for antidep breaking override... + if (EnableAntiDepBreaking.getPosition() > 0) { + AntiDepMode = (EnableAntiDepBreaking == "all") ? + TargetSubtarget::ANTIDEP_ALL : + (EnableAntiDepBreaking == "critical") + ? TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE; + } + + DEBUG(dbgs() << "PostRAScheduler\n"); + + const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + const TargetMachine &TM = Fn.getTarget(); + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + ScheduleHazardRecognizer *HR = + TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins); + AntiDepBreaker *ADB = + ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? + (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : + ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? + (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL)); + + SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA); + + // Loop over all of the basic blocks + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { +#ifndef NDEBUG + // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod + if (DebugDiv > 0) { + static int bbcnt = 0; + if (bbcnt++ % DebugDiv != DebugMod) + continue; + dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << + ":BB#" << MBB->getNumber() << " ***\n"; + } +#endif + + // Initialize register live-range state for scheduling in this block. + Scheduler.StartBlock(MBB); + + // Schedule each sequence of instructions not interrupted by a label + // or anything else that effectively needs to shut down scheduling. + MachineBasicBlock::iterator Current = MBB->end(); + unsigned Count = MBB->size(), CurrentCount = Count; + for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { + MachineInstr *MI = llvm::prior(I); + if (TII->isSchedulingBoundary(MI, MBB, Fn)) { + Scheduler.Run(MBB, I, Current, CurrentCount); + Scheduler.EmitSchedule(); + Current = MI; + CurrentCount = Count - 1; + Scheduler.Observe(MI, CurrentCount); + } + I = MI; + --Count; + } + assert(Count == 0 && "Instruction count mismatch!"); + assert((MBB->begin() == Current || CurrentCount != 0) && + "Instruction count mismatch!"); + Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.EmitSchedule(); + + // Clean up register live-range state. + Scheduler.FinishBlock(); + + // Update register kills + Scheduler.FixupKills(MBB); + } + + delete HR; + delete ADB; + + return true; +} + +/// StartBlock - Initialize register live-range state for scheduling in +/// this block. +/// +void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { + // Call the superclass. + ScheduleDAGInstrs::StartBlock(BB); + + // Reset the hazard recognizer and anti-dep breaker. + HazardRec->Reset(); + if (AntiDepBreak != NULL) + AntiDepBreak->StartBlock(BB); +} + +/// Schedule - Schedule the instruction range using list scheduling. +/// +void SchedulePostRATDList::Schedule() { + // Build the scheduling graph. + BuildSchedGraph(AA); + + if (AntiDepBreak != NULL) { + unsigned Broken = + AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos, + InsertPosIndex); + + if (Broken != 0) { + // We made changes. Update the dependency graph. + // Theoretically we could update the graph in place: + // When a live range is changed to use a different register, remove + // the def's anti-dependence *and* output-dependence edges due to + // that register, and add new anti-dependence and output-dependence + // edges based on the next live range of the register. + SUnits.clear(); + Sequence.clear(); + EntrySU = SUnit(); + ExitSU = SUnit(); + BuildSchedGraph(AA); + + NumFixedAnti += Broken; + } + } + + DEBUG(dbgs() << "********** List Scheduling **********\n"); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + AvailableQueue.initNodes(SUnits); + ListScheduleTopDown(); + AvailableQueue.releaseState(); +} + +/// Observe - Update liveness information to account for the current +/// instruction, which will not be scheduled. +/// +void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { + if (AntiDepBreak != NULL) + AntiDepBreak->Observe(MI, Count, InsertPosIndex); +} + +/// FinishBlock - Clean up register live-range state. +/// +void SchedulePostRATDList::FinishBlock() { + if (AntiDepBreak != NULL) + AntiDepBreak->FinishBlock(); + + // Call the superclass. + ScheduleDAGInstrs::FinishBlock(); +} + +/// StartBlockForKills - Initialize register live-range state for updating kills +/// +void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { + // Initialize the indices to indicate that no registers are live. + for (unsigned i = 0; i < TRI->getNumRegs(); ++i) + KillIndices[i] = ~0u; + + // Determine the live-out physregs for this block. + if (!BB->empty() && BB->back().getDesc().isReturn()) { + // In a return block, examine the function live-out regs. + for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), + E = MRI.liveout_end(); I != E; ++I) { + unsigned Reg = *I; + KillIndices[Reg] = BB->size(); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = BB->size(); + } + } + } + else { + // In a non-return block, examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + KillIndices[Reg] = BB->size(); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = BB->size(); + } + } + } + } +} + +bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, + MachineOperand &MO) { + // Setting kill flag... + if (!MO.isKill()) { + MO.setIsKill(true); + return false; + } + + // If MO itself is live, clear the kill flag... + if (KillIndices[MO.getReg()] != ~0u) { + MO.setIsKill(false); + return false; + } + + // If any subreg of MO is live, then create an imp-def for that + // subreg and keep MO marked as killed. + MO.setIsKill(false); + bool AllDead = true; + const unsigned SuperReg = MO.getReg(); + for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg); + *Subreg; ++Subreg) { + if (KillIndices[*Subreg] != ~0u) { + MI->addOperand(MachineOperand::CreateReg(*Subreg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/, + false /*IsDead*/)); + AllDead = false; + } + } + + if(AllDead) + MO.setIsKill(true); + return false; +} + +/// FixupKills - Fix the register kill flags, they may have been made +/// incorrect by instruction reordering. +/// +void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); + + std::set<unsigned> killedRegs; + BitVector ReservedRegs = TRI->getReservedRegs(MF); + + StartBlockForKills(MBB); + + // Examine block from end to start... + unsigned Count = MBB->size(); + for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); + I != E; --Count) { + MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; + + // Update liveness. Registers that are defed but not used in this + // instruction are now dead. Mark register and all subregs as they + // are completely defined. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + KillIndices[Reg] = ~0u; + + // Repeat for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = ~0u; + } + } + + // Examine all used registers and set/clear kill flag. When a + // register is used multiple times we only set the kill flag on + // the first use. + killedRegs.clear(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + + bool kill = false; + if (killedRegs.find(Reg) == killedRegs.end()) { + kill = true; + // A register is not killed if any subregs are live... + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + if (KillIndices[*Subreg] != ~0u) { + kill = false; + break; + } + } + + // If subreg is not live, then register is killed if it became + // live in this instruction + if (kill) + kill = (KillIndices[Reg] == ~0u); + } + + if (MO.isKill() != kill) { + DEBUG(dbgs() << "Fixing " << MO << " in "); + // Warning: ToggleKillFlag may invalidate MO. + ToggleKillFlag(MI, MO); + DEBUG(MI->dump()); + } + + killedRegs.insert(Reg); + } + + // Mark any used register (that is not using undef) and subregs as + // now live... + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + + KillIndices[Reg] = Count; + + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = Count; + } + } + } +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. Also update its cycle bound. +void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --SuccSU->NumPredsLeft; + + // Compute how many cycles it will be before this actually becomes + // available. This is the max of the start time of all predecessors plus + // their latencies. + SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + PendingQueue.push_back(SuccSU); +} + +/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. +void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + ReleaseSucc(SU, &*I); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && + "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue.ScheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void SchedulePostRATDList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // We're scheduling top-down but we're visiting the regions in + // bottom-up order, so we don't know the hazards at the start of a + // region. So assume no hazards (this should usually be ok as most + // blocks are a single region). + HazardRec->Reset(); + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // Add all leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + bool available = SUnits[i].Preds.empty(); + if (available) { + AvailableQueue.push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // In any cycle where we can't schedule any instructions, we must + // stall or emit a noop, depending on the target. + bool CycleHasInsts = false; + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + unsigned MinDepth = ~0u; + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() <= CurCycle) { + AvailableQueue.push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } else if (PendingQueue[i]->getDepth() < MinDepth) + MinDepth = PendingQueue[i]->getDepth(); + } + + DEBUG(dbgs() << "\n*** Examining Available\n"; + LatencyPriorityQueue q = AvailableQueue; + while (!q.empty()) { + SUnit *su = q.pop(); + dbgs() << "Height " << su->getHeight() << ": "; + su->dump(this); + }); + + SUnit *FoundSUnit = 0; + bool HasNoopHazards = false; + while (!AvailableQueue.empty()) { + SUnit *CurSUnit = AvailableQueue.pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue.push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule... + if (FoundSUnit) { + // ... schedule the node... + ScheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + CycleHasInsts = true; + } else { + if (CycleHasInsts) { + DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, + // just advance the current cycle and try again. + DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + ++NumStalls; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + } + + ++CurCycle; + CycleHasInsts = false; + } + } + +#ifndef NDEBUG + VerifySchedule(/*isBottomUp=*/false); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) { + return new PostRAScheduler(OptLevel); +} diff --git a/contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp b/contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp new file mode 100644 index 0000000..cd9d83e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp @@ -0,0 +1,1423 @@ +//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level pre-register allocation +// live interval splitting pass. It finds live interval barriers, i.e. +// instructions which will kill all physical registers in certain register +// classes, and split all live intervals which cross the barrier. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-alloc-split" +#include "VirtRegMap.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden); +static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), + cl::Hidden); +static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), + cl::Hidden); + +STATISTIC(NumSplits, "Number of intervals split"); +STATISTIC(NumRemats, "Number of intervals split by rematerialization"); +STATISTIC(NumFolds, "Number of intervals split with spill folding"); +STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding"); +STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers"); +STATISTIC(NumDeadSpills, "Number of dead spills removed"); + +namespace { + class PreAllocSplitting : public MachineFunctionPass { + MachineFunction *CurrMF; + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo* TRI; + MachineFrameInfo *MFI; + MachineRegisterInfo *MRI; + SlotIndexes *SIs; + LiveIntervals *LIs; + LiveStacks *LSs; + VirtRegMap *VRM; + + // Barrier - Current barrier being processed. + MachineInstr *Barrier; + + // BarrierMBB - Basic block where the barrier resides in. + MachineBasicBlock *BarrierMBB; + + // Barrier - Current barrier index. + SlotIndex BarrierIdx; + + // CurrLI - Current live interval being split. + LiveInterval *CurrLI; + + // CurrSLI - Current stack slot live interval. + LiveInterval *CurrSLI; + + // CurrSValNo - Current val# for the stack slot live interval. + VNInfo *CurrSValNo; + + // IntervalSSMap - A map from live interval to spill slots. + DenseMap<unsigned, int> IntervalSSMap; + + // Def2SpillMap - A map from a def instruction index to spill index. + DenseMap<SlotIndex, SlotIndex> Def2SpillMap; + + public: + static char ID; + PreAllocSplitting() + : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addPreserved<RegisterCoalescer>(); + AU.addPreserved<CalculateSpillWeights>(); + if (StrongPHIElim) + AU.addPreservedID(StrongPHIEliminationID); + else + AU.addPreservedID(PHIEliminationID); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<VirtRegMap>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual void releaseMemory() { + IntervalSSMap.clear(); + Def2SpillMap.clear(); + } + + virtual const char *getPassName() const { + return "Pre-Register Allocaton Live Interval Splitting"; + } + + /// print - Implement the dump method. + virtual void print(raw_ostream &O, const Module* M = 0) const { + LIs->print(O, M); + } + + + private: + + MachineBasicBlock::iterator + findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*, + SmallPtrSet<MachineInstr*, 4>&); + + MachineBasicBlock::iterator + findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex, + SmallPtrSet<MachineInstr*, 4>&); + + int CreateSpillStackSlot(unsigned, const TargetRegisterClass *); + + bool IsAvailableInStack(MachineBasicBlock*, unsigned, + SlotIndex, SlotIndex, + SlotIndex&, int&) const; + + void UpdateSpillSlotInterval(VNInfo*, SlotIndex, SlotIndex); + + bool SplitRegLiveInterval(LiveInterval*); + + bool SplitRegLiveIntervals(const TargetRegisterClass **, + SmallPtrSet<LiveInterval*, 8>&); + + bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB, + MachineBasicBlock* BarrierMBB); + bool Rematerialize(unsigned vreg, VNInfo* ValNo, + MachineInstr* DefMI, + MachineBasicBlock::iterator RestorePt, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB); + MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC, + MachineInstr* DefMI, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int& SS, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB); + MachineInstr* FoldRestore(unsigned vreg, + const TargetRegisterClass* RC, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int SS, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB); + void RenumberValno(VNInfo* VN); + void ReconstructLiveInterval(LiveInterval* LI); + bool removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split); + unsigned getNumberOfNonSpills(SmallPtrSet<MachineInstr*, 4>& MIs, + unsigned Reg, int FrameIndex, bool& TwoAddr); + VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet<MachineInstr*, 4>& Visited, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, + DenseMap<MachineInstr*, VNInfo*>& NewVNs, + DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, + DenseMap<MachineBasicBlock*, VNInfo*>& Phis, + bool IsTopLevel, bool IsIntraBlock); + VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet<MachineInstr*, 4>& Visited, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, + DenseMap<MachineInstr*, VNInfo*>& NewVNs, + DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, + DenseMap<MachineBasicBlock*, VNInfo*>& Phis, + bool IsTopLevel, bool IsIntraBlock); +}; +} // end anonymous namespace + +char PreAllocSplitting::ID = 0; + +INITIALIZE_PASS(PreAllocSplitting, "pre-alloc-splitting", + "Pre-Register Allocation Live Interval Splitting", + false, false); + +char &llvm::PreAllocSplittingID = PreAllocSplitting::ID; + +/// findSpillPoint - Find a gap as far away from the given MI that's suitable +/// for spilling the current live interval. The index must be before any +/// defs and uses of the live interval register in the mbb. Return begin() if +/// none is found. +MachineBasicBlock::iterator +PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, + MachineInstr *DefMI, + SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { + MachineBasicBlock::iterator Pt = MBB->begin(); + + MachineBasicBlock::iterator MII = MI; + MachineBasicBlock::iterator EndPt = DefMI + ? MachineBasicBlock::iterator(DefMI) : MBB->begin(); + + while (MII != EndPt && !RefsInMBB.count(MII) && + MII->getOpcode() != TRI->getCallFrameSetupOpcode()) + --MII; + if (MII == EndPt || RefsInMBB.count(MII)) return Pt; + + while (MII != EndPt && !RefsInMBB.count(MII)) { + // We can't insert the spill between the barrier (a call), and its + // corresponding call frame setup. + if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) { + while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) { + --MII; + if (MII == EndPt) { + return Pt; + } + } + continue; + } else { + Pt = MII; + } + + if (RefsInMBB.count(MII)) + return Pt; + + + --MII; + } + + return Pt; +} + +/// findRestorePoint - Find a gap in the instruction index map that's suitable +/// for restoring the current live interval value. The index must be before any +/// uses of the live interval register in the mbb. Return end() if none is +/// found. +MachineBasicBlock::iterator +PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, + SlotIndex LastIdx, + SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { + // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb + // begin index accordingly. + MachineBasicBlock::iterator Pt = MBB->end(); + MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator(); + + // We start at the call, so walk forward until we find the call frame teardown + // since we can't insert restores before that. Bail if we encounter a use + // during this time. + MachineBasicBlock::iterator MII = MI; + if (MII == EndPt) return Pt; + + while (MII != EndPt && !RefsInMBB.count(MII) && + MII->getOpcode() != TRI->getCallFrameDestroyOpcode()) + ++MII; + if (MII == EndPt || RefsInMBB.count(MII)) return Pt; + ++MII; + + // FIXME: Limit the number of instructions to examine to reduce + // compile time? + while (MII != EndPt) { + SlotIndex Index = LIs->getInstructionIndex(MII); + if (Index > LastIdx) + break; + + // We can't insert a restore between the barrier (a call) and its + // corresponding call frame teardown. + if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) { + do { + if (MII == EndPt || RefsInMBB.count(MII)) return Pt; + ++MII; + } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode()); + } else { + Pt = MII; + } + + if (RefsInMBB.count(MII)) + return Pt; + + ++MII; + } + + return Pt; +} + +/// CreateSpillStackSlot - Create a stack slot for the live interval being +/// split. If the live interval was previously split, just reuse the same +/// slot. +int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, + const TargetRegisterClass *RC) { + int SS; + DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg); + if (I != IntervalSSMap.end()) { + SS = I->second; + } else { + SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); + IntervalSSMap[Reg] = SS; + } + + // Create live interval for stack slot. + CurrSLI = &LSs->getOrCreateInterval(SS, RC); + if (CurrSLI->hasAtLeastOneValue()) + CurrSValNo = CurrSLI->getValNumInfo(0); + else + CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false, + LSs->getVNInfoAllocator()); + return SS; +} + +/// IsAvailableInStack - Return true if register is available in a split stack +/// slot at the specified index. +bool +PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, + unsigned Reg, SlotIndex DefIndex, + SlotIndex RestoreIndex, + SlotIndex &SpillIndex, + int& SS) const { + if (!DefMBB) + return false; + + DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg); + if (I == IntervalSSMap.end()) + return false; + DenseMap<SlotIndex, SlotIndex>::const_iterator + II = Def2SpillMap.find(DefIndex); + if (II == Def2SpillMap.end()) + return false; + + // If last spill of def is in the same mbb as barrier mbb (where restore will + // be), make sure it's not below the intended restore index. + // FIXME: Undo the previous spill? + assert(LIs->getMBBFromIndex(II->second) == DefMBB); + if (DefMBB == BarrierMBB && II->second >= RestoreIndex) + return false; + + SS = I->second; + SpillIndex = II->second; + return true; +} + +/// UpdateSpillSlotInterval - Given the specified val# of the register live +/// interval being split, and the spill and restore indicies, update the live +/// interval of the spill stack slot. +void +PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, SlotIndex SpillIndex, + SlotIndex RestoreIndex) { + assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB && + "Expect restore in the barrier mbb"); + + MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex); + if (MBB == BarrierMBB) { + // Intra-block spill + restore. We are done. + LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo); + CurrSLI->addRange(SLR); + return; + } + + SmallPtrSet<MachineBasicBlock*, 4> Processed; + SlotIndex EndIdx = LIs->getMBBEndIdx(MBB); + LiveRange SLR(SpillIndex, EndIdx, CurrSValNo); + CurrSLI->addRange(SLR); + Processed.insert(MBB); + + // Start from the spill mbb, figure out the extend of the spill slot's + // live interval. + SmallVector<MachineBasicBlock*, 4> WorkList; + const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex); + if (LR->end > EndIdx) + // If live range extend beyond end of mbb, add successors to work list. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + WorkList.push_back(*SI); + + while (!WorkList.empty()) { + MachineBasicBlock *MBB = WorkList.back(); + WorkList.pop_back(); + if (Processed.count(MBB)) + continue; + SlotIndex Idx = LIs->getMBBStartIdx(MBB); + LR = CurrLI->getLiveRangeContaining(Idx); + if (LR && LR->valno == ValNo) { + EndIdx = LIs->getMBBEndIdx(MBB); + if (Idx <= RestoreIndex && RestoreIndex < EndIdx) { + // Spill slot live interval stops at the restore. + LiveRange SLR(Idx, RestoreIndex, CurrSValNo); + CurrSLI->addRange(SLR); + } else if (LR->end > EndIdx) { + // Live range extends beyond end of mbb, process successors. + LiveRange SLR(Idx, EndIdx.getNextIndex(), CurrSValNo); + CurrSLI->addRange(SLR); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + WorkList.push_back(*SI); + } else { + LiveRange SLR(Idx, LR->end, CurrSValNo); + CurrSLI->addRange(SLR); + } + Processed.insert(MBB); + } + } +} + +/// PerformPHIConstruction - From properly set up use and def lists, use a PHI +/// construction algorithm to compute the ranges and valnos for an interval. +VNInfo* +PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet<MachineInstr*, 4>& Visited, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, + DenseMap<MachineInstr*, VNInfo*>& NewVNs, + DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, + DenseMap<MachineBasicBlock*, VNInfo*>& Phis, + bool IsTopLevel, bool IsIntraBlock) { + // Return memoized result if it's available. + if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI)) + return NewVNs[UseI]; + else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI)) + return NewVNs[UseI]; + else if (!IsIntraBlock && LiveOut.count(MBB)) + return LiveOut[MBB]; + + // Check if our block contains any uses or defs. + bool ContainsDefs = Defs.count(MBB); + bool ContainsUses = Uses.count(MBB); + + VNInfo* RetVNI = 0; + + // Enumerate the cases of use/def contaning blocks. + if (!ContainsDefs && !ContainsUses) { + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses, + NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + } else if (ContainsDefs && !ContainsUses) { + SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; + + // Search for the def in this block. If we don't find it before the + // instruction we care about, go to the fallback case. Note that that + // should never happen: this cannot be intrablock, so use should + // always be an end() iterator. + assert(UseI == MBB->end() && "No use marked in intrablock"); + + MachineBasicBlock::iterator Walker = UseI; + --Walker; + while (Walker != MBB->begin()) { + if (BlockDefs.count(Walker)) + break; + --Walker; + } + + // Once we've found it, extend its VNInfo to our instruction. + SlotIndex DefIndex = LIs->getInstructionIndex(Walker); + DefIndex = DefIndex.getDefIndex(); + SlotIndex EndIndex = LIs->getMBBEndIdx(MBB); + + RetVNI = NewVNs[Walker]; + LI->addRange(LiveRange(DefIndex, EndIndex, RetVNI)); + } else if (!ContainsDefs && ContainsUses) { + SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; + + // Search for the use in this block that precedes the instruction we care + // about, going to the fallback case if we don't find it. + MachineBasicBlock::iterator Walker = UseI; + bool found = false; + while (Walker != MBB->begin()) { + --Walker; + if (BlockUses.count(Walker)) { + found = true; + break; + } + } + + if (!found) + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + + SlotIndex UseIndex = LIs->getInstructionIndex(Walker); + UseIndex = UseIndex.getUseIndex(); + SlotIndex EndIndex; + if (IsIntraBlock) { + EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); + } else + EndIndex = LIs->getMBBEndIdx(MBB); + + // Now, recursively phi construct the VNInfo for the use we found, + // and then extend it to include the instruction we care about + RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, + NewVNs, LiveOut, Phis, false, true); + + LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI)); + + // FIXME: Need to set kills properly for inter-block stuff. + } else if (ContainsDefs && ContainsUses) { + SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; + SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; + + // This case is basically a merging of the two preceding case, with the + // special note that checking for defs must take precedence over checking + // for uses, because of two-address instructions. + MachineBasicBlock::iterator Walker = UseI; + bool foundDef = false; + bool foundUse = false; + while (Walker != MBB->begin()) { + --Walker; + if (BlockDefs.count(Walker)) { + foundDef = true; + break; + } else if (BlockUses.count(Walker)) { + foundUse = true; + break; + } + } + + if (!foundDef && !foundUse) + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + + SlotIndex StartIndex = LIs->getInstructionIndex(Walker); + StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex(); + SlotIndex EndIndex; + if (IsIntraBlock) { + EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); + } else + EndIndex = LIs->getMBBEndIdx(MBB); + + if (foundDef) + RetVNI = NewVNs[Walker]; + else + RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, + NewVNs, LiveOut, Phis, false, true); + + LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); + } + + // Memoize results so we don't have to recompute them. + if (!IsIntraBlock) LiveOut[MBB] = RetVNI; + else { + if (!NewVNs.count(UseI)) + NewVNs[UseI] = RetVNI; + Visited.insert(UseI); + } + + return RetVNI; +} + +/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path. +/// +VNInfo* +PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet<MachineInstr*, 4>& Visited, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, + DenseMap<MachineInstr*, VNInfo*>& NewVNs, + DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, + DenseMap<MachineBasicBlock*, VNInfo*>& Phis, + bool IsTopLevel, bool IsIntraBlock) { + // NOTE: Because this is the fallback case from other cases, we do NOT + // assume that we are not intrablock here. + if (Phis.count(MBB)) return Phis[MBB]; + + SlotIndex StartIndex = LIs->getMBBStartIdx(MBB); + VNInfo *RetVNI = Phis[MBB] = + LI->getNextValue(SlotIndex(), /*FIXME*/ 0, false, + LIs->getVNInfoAllocator()); + + if (!IsIntraBlock) LiveOut[MBB] = RetVNI; + + // If there are no uses or defs between our starting point and the + // beginning of the block, then recursive perform phi construction + // on our predecessors. + DenseMap<MachineBasicBlock*, VNInfo*> IncomingVNs; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI, + Visited, Defs, Uses, NewVNs, + LiveOut, Phis, false, false); + if (Incoming != 0) + IncomingVNs[*PI] = Incoming; + } + + if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) { + VNInfo* OldVN = RetVNI; + VNInfo* NewVN = IncomingVNs.begin()->second; + VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN); + if (MergedVN == OldVN) std::swap(OldVN, NewVN); + + for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator LOI = LiveOut.begin(), + LOE = LiveOut.end(); LOI != LOE; ++LOI) + if (LOI->second == OldVN) + LOI->second = MergedVN; + for (DenseMap<MachineInstr*, VNInfo*>::iterator NVI = NewVNs.begin(), + NVE = NewVNs.end(); NVI != NVE; ++NVI) + if (NVI->second == OldVN) + NVI->second = MergedVN; + for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator PI = Phis.begin(), + PE = Phis.end(); PI != PE; ++PI) + if (PI->second == OldVN) + PI->second = MergedVN; + RetVNI = MergedVN; + } else { + // Otherwise, merge the incoming VNInfos with a phi join. Create a new + // VNInfo to represent the joined value. + for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I = + IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { + I->second->setHasPHIKill(true); + } + } + + SlotIndex EndIndex; + if (IsIntraBlock) { + EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); + } else + EndIndex = LIs->getMBBEndIdx(MBB); + LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); + + // Memoize results so we don't have to recompute them. + if (!IsIntraBlock) + LiveOut[MBB] = RetVNI; + else { + if (!NewVNs.count(UseI)) + NewVNs[UseI] = RetVNI; + Visited.insert(UseI); + } + + return RetVNI; +} + +/// ReconstructLiveInterval - Recompute a live interval from scratch. +void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { + VNInfo::Allocator& Alloc = LIs->getVNInfoAllocator(); + + // Clear the old ranges and valnos; + LI->clear(); + + // Cache the uses and defs of the register + typedef DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> > RegMap; + RegMap Defs, Uses; + + // Keep track of the new VNs we're creating. + DenseMap<MachineInstr*, VNInfo*> NewVNs; + SmallPtrSet<VNInfo*, 2> PhiVNs; + + // Cache defs, and create a new VNInfo for each def. + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), + DE = MRI->def_end(); DI != DE; ++DI) { + Defs[(*DI).getParent()].insert(&*DI); + + SlotIndex DefIdx = LIs->getInstructionIndex(&*DI); + DefIdx = DefIdx.getDefIndex(); + + assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting."); + VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc); + + // If the def is a move, set the copy field. + if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) + NewVN->setCopy(&*DI); + + NewVNs[&*DI] = NewVN; + } + + // Cache uses as a separate pass from actually processing them. + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), + UE = MRI->use_end(); UI != UE; ++UI) + Uses[(*UI).getParent()].insert(&*UI); + + // Now, actually process every use and use a phi construction algorithm + // to walk from it to its reaching definitions, building VNInfos along + // the way. + DenseMap<MachineBasicBlock*, VNInfo*> LiveOut; + DenseMap<MachineBasicBlock*, VNInfo*> Phis; + SmallPtrSet<MachineInstr*, 4> Visited; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), + UE = MRI->use_end(); UI != UE; ++UI) { + PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, true, true); + } + + // Add ranges for dead defs + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), + DE = MRI->def_end(); DI != DE; ++DI) { + SlotIndex DefIdx = LIs->getInstructionIndex(&*DI); + DefIdx = DefIdx.getDefIndex(); + + if (LI->liveAt(DefIdx)) continue; + + VNInfo* DeadVN = NewVNs[&*DI]; + LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN)); + } +} + +/// RenumberValno - Split the given valno out into a new vreg, allowing it to +/// be allocated to a different register. This function creates a new vreg, +/// copies the valno and its live ranges over to the new vreg's interval, +/// removes them from the old interval, and rewrites all uses and defs of +/// the original reg to the new vreg within those ranges. +void PreAllocSplitting::RenumberValno(VNInfo* VN) { + SmallVector<VNInfo*, 4> Stack; + SmallVector<VNInfo*, 4> VNsToCopy; + Stack.push_back(VN); + + // Walk through and copy the valno we care about, and any other valnos + // that are two-address redefinitions of the one we care about. These + // will need to be rewritten as well. We also check for safety of the + // renumbering here, by making sure that none of the valno involved has + // phi kills. + while (!Stack.empty()) { + VNInfo* OldVN = Stack.back(); + Stack.pop_back(); + + // Bail out if we ever encounter a valno that has a PHI kill. We can't + // renumber these. + if (OldVN->hasPHIKill()) return; + + VNsToCopy.push_back(OldVN); + + // Locate two-address redefinitions + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg), + DE = MRI->def_end(); DI != DE; ++DI) { + if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue; + SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex(); + VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx); + if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) != + VNsToCopy.end()) + Stack.push_back(NextVN); + } + } + + // Create the new vreg + unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg)); + + // Create the new live interval + LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg); + + for (SmallVector<VNInfo*, 4>::iterator OI = VNsToCopy.begin(), OE = + VNsToCopy.end(); OI != OE; ++OI) { + VNInfo* OldVN = *OI; + + // Copy the valno over + VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator()); + NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN); + + // Remove the valno from the old interval + CurrLI->removeValNo(OldVN); + } + + // Rewrite defs and uses. This is done in two stages to avoid invalidating + // the reg_iterator. + SmallVector<std::pair<MachineInstr*, unsigned>, 8> OpsToChange; + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), + E = MRI->reg_end(); I != E; ++I) { + MachineOperand& MO = I.getOperand(); + SlotIndex InstrIdx = LIs->getInstructionIndex(&*I); + + if ((MO.isUse() && NewLI.liveAt(InstrIdx.getUseIndex())) || + (MO.isDef() && NewLI.liveAt(InstrIdx.getDefIndex()))) + OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo())); + } + + for (SmallVector<std::pair<MachineInstr*, unsigned>, 8>::iterator I = + OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) { + MachineInstr* Inst = I->first; + unsigned OpIdx = I->second; + MachineOperand& MO = Inst->getOperand(OpIdx); + MO.setReg(NewVReg); + } + + // Grow the VirtRegMap, since we've created a new vreg. + VRM->grow(); + + // The renumbered vreg shares a stack slot with the old register. + if (IntervalSSMap.count(CurrLI->reg)) + IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg]; + + ++NumRenumbers; +} + +bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, + MachineInstr* DefMI, + MachineBasicBlock::iterator RestorePt, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { + MachineBasicBlock& MBB = *RestorePt->getParent(); + + MachineBasicBlock::iterator KillPt = BarrierMBB->end(); + if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB) + KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); + else + KillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); + + if (KillPt == DefMI->getParent()->end()) + return false; + + TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI); + SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt)); + + ReconstructLiveInterval(CurrLI); + RematIdx = RematIdx.getDefIndex(); + RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx)); + + ++NumSplits; + ++NumRemats; + return true; +} + +MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, + const TargetRegisterClass* RC, + MachineInstr* DefMI, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int& SS, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { + // Go top down if RefsInMBB is empty. + if (RefsInMBB.empty()) + return 0; + + MachineBasicBlock::iterator FoldPt = Barrier; + while (&*FoldPt != DefMI && FoldPt != MBB->begin() && + !RefsInMBB.count(FoldPt)) + --FoldPt; + + int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg); + if (OpIdx == -1) + return 0; + + SmallVector<unsigned, 1> Ops; + Ops.push_back(OpIdx); + + if (!TII->canFoldMemoryOperand(FoldPt, Ops)) + return 0; + + DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(vreg); + if (I != IntervalSSMap.end()) { + SS = I->second; + } else { + SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); + } + + MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); + + if (FMI) { + LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); + FoldPt->eraseFromParent(); + ++NumFolds; + + IntervalSSMap[vreg] = SS; + CurrSLI = &LSs->getOrCreateInterval(SS, RC); + if (CurrSLI->hasAtLeastOneValue()) + CurrSValNo = CurrSLI->getValNumInfo(0); + else + CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false, + LSs->getVNInfoAllocator()); + } + + return FMI; +} + +MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, + const TargetRegisterClass* RC, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int SS, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { + if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds) + return 0; + + // Go top down if RefsInMBB is empty. + if (RefsInMBB.empty()) + return 0; + + // Can't fold a restore between a call stack setup and teardown. + MachineBasicBlock::iterator FoldPt = Barrier; + + // Advance from barrier to call frame teardown. + while (FoldPt != MBB->getFirstTerminator() && + FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { + if (RefsInMBB.count(FoldPt)) + return 0; + + ++FoldPt; + } + + if (FoldPt == MBB->getFirstTerminator()) + return 0; + else + ++FoldPt; + + // Now find the restore point. + while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) { + if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) { + while (FoldPt != MBB->getFirstTerminator() && + FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { + if (RefsInMBB.count(FoldPt)) + return 0; + + ++FoldPt; + } + + if (FoldPt == MBB->getFirstTerminator()) + return 0; + } + + ++FoldPt; + } + + if (FoldPt == MBB->getFirstTerminator()) + return 0; + + int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true); + if (OpIdx == -1) + return 0; + + SmallVector<unsigned, 1> Ops; + Ops.push_back(OpIdx); + + if (!TII->canFoldMemoryOperand(FoldPt, Ops)) + return 0; + + MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); + + if (FMI) { + LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); + FoldPt->eraseFromParent(); + ++NumRestoreFolds; + } + + return FMI; +} + +/// SplitRegLiveInterval - Split (spill and restore) the given live interval +/// so it would not cross the barrier that's being processed. Shrink wrap +/// (minimize) the live interval to the last uses. +bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { + DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier + << " result: "); + + CurrLI = LI; + + // Find live range where current interval cross the barrier. + LiveInterval::iterator LR = + CurrLI->FindLiveRangeContaining(BarrierIdx.getUseIndex()); + VNInfo *ValNo = LR->valno; + + assert(!ValNo->isUnused() && "Val# is defined by a dead def?"); + + MachineInstr *DefMI = ValNo->isDefAccurate() + ? LIs->getInstructionFromIndex(ValNo->def) : NULL; + + // If this would create a new join point, do not split. + if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) { + DEBUG(dbgs() << "FAILED (would create a new join point).\n"); + return false; + } + + // Find all references in the barrier mbb. + SmallPtrSet<MachineInstr*, 4> RefsInMBB; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), + E = MRI->reg_end(); I != E; ++I) { + MachineInstr *RefMI = &*I; + if (RefMI->getParent() == BarrierMBB) + RefsInMBB.insert(RefMI); + } + + // Find a point to restore the value after the barrier. + MachineBasicBlock::iterator RestorePt = + findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB); + if (RestorePt == BarrierMBB->end()) { + DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n"); + return false; + } + + if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI)) + if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) { + DEBUG(dbgs() << "success (remat).\n"); + return true; + } + + // Add a spill either before the barrier or after the definition. + MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL; + const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg); + SlotIndex SpillIndex; + MachineInstr *SpillMI = NULL; + int SS = -1; + if (!ValNo->isDefAccurate()) { + // If we don't know where the def is we must split just before the barrier. + if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier, + BarrierMBB, SS, RefsInMBB))) { + SpillIndex = LIs->getInstructionIndex(SpillMI); + } else { + MachineBasicBlock::iterator SpillPt = + findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); + if (SpillPt == BarrierMBB->begin()) { + DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); + return false; // No gap to insert spill. + } + // Add spill. + + SS = CreateSpillStackSlot(CurrLI->reg, RC); + TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC, + TRI); + SpillMI = prior(SpillPt); + SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); + } + } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def, + LIs->getZeroIndex(), SpillIndex, SS)) { + // If it's already split, just restore the value. There is no need to spill + // the def again. + if (!DefMI) { + DEBUG(dbgs() << "FAILED (def is dead).\n"); + return false; // Def is dead. Do nothing. + } + + if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier, + BarrierMBB, SS, RefsInMBB))) { + SpillIndex = LIs->getInstructionIndex(SpillMI); + } else { + // Check if it's possible to insert a spill after the def MI. + MachineBasicBlock::iterator SpillPt; + if (DefMBB == BarrierMBB) { + // Add spill after the def and the last use before the barrier. + SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI, + RefsInMBB); + if (SpillPt == DefMBB->begin()) { + DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); + return false; // No gap to insert spill. + } + } else { + SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); + if (SpillPt == DefMBB->end()) { + DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); + return false; // No gap to insert spill. + } + } + // Add spill. + SS = CreateSpillStackSlot(CurrLI->reg, RC); + TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC, + TRI); + SpillMI = prior(SpillPt); + SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); + } + } + + // Remember def instruction index to spill index mapping. + if (DefMI && SpillMI) + Def2SpillMap[ValNo->def] = SpillIndex; + + // Add restore. + bool FoldedRestore = false; + SlotIndex RestoreIndex; + if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier, + BarrierMBB, SS, RefsInMBB)) { + RestorePt = LMI; + RestoreIndex = LIs->getInstructionIndex(RestorePt); + FoldedRestore = true; + } else { + TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC, TRI); + MachineInstr *LoadMI = prior(RestorePt); + RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI); + } + + // Update spill stack slot live interval. + UpdateSpillSlotInterval(ValNo, SpillIndex.getUseIndex().getNextSlot(), + RestoreIndex.getDefIndex()); + + ReconstructLiveInterval(CurrLI); + + if (!FoldedRestore) { + SlotIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt)); + RestoreIdx = RestoreIdx.getDefIndex(); + RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx)); + } + + ++NumSplits; + DEBUG(dbgs() << "success.\n"); + return true; +} + +/// SplitRegLiveIntervals - Split all register live intervals that cross the +/// barrier that's being processed. +bool +PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs, + SmallPtrSet<LiveInterval*, 8>& Split) { + // First find all the virtual registers whose live intervals are intercepted + // by the current barrier. + SmallVector<LiveInterval*, 8> Intervals; + for (const TargetRegisterClass **RC = RCs; *RC; ++RC) { + // FIXME: If it's not safe to move any instruction that defines the barrier + // register class, then it means there are some special dependencies which + // codegen is not modelling. Ignore these barriers for now. + if (!TII->isSafeToMoveRegClassDefs(*RC)) + continue; + const std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC); + for (unsigned i = 0, e = VRs.size(); i != e; ++i) { + unsigned Reg = VRs[i]; + if (!LIs->hasInterval(Reg)) + continue; + LiveInterval *LI = &LIs->getInterval(Reg); + if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg)) + // Virtual register live interval is intercepted by the barrier. We + // should split and shrink wrap its interval if possible. + Intervals.push_back(LI); + } + } + + // Process the affected live intervals. + bool Change = false; + while (!Intervals.empty()) { + if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit) + break; + LiveInterval *LI = Intervals.back(); + Intervals.pop_back(); + bool result = SplitRegLiveInterval(LI); + if (result) Split.insert(LI); + Change |= result; + } + + return Change; +} + +unsigned PreAllocSplitting::getNumberOfNonSpills( + SmallPtrSet<MachineInstr*, 4>& MIs, + unsigned Reg, int FrameIndex, + bool& FeedsTwoAddr) { + unsigned NonSpills = 0; + for (SmallPtrSet<MachineInstr*, 4>::iterator UI = MIs.begin(), UE = MIs.end(); + UI != UE; ++UI) { + int StoreFrameIndex; + unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); + if (StoreVReg != Reg || StoreFrameIndex != FrameIndex) + ++NonSpills; + + int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg); + if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx)) + FeedsTwoAddr = true; + } + + return NonSpills; +} + +/// removeDeadSpills - After doing splitting, filter through all intervals we've +/// split, and see if any of the spills are unnecessary. If so, remove them. +bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { + bool changed = false; + + // Walk over all of the live intervals that were touched by the splitter, + // and see if we can do any DCE and/or folding. + for (SmallPtrSet<LiveInterval*, 8>::iterator LI = split.begin(), + LE = split.end(); LI != LE; ++LI) { + DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> > VNUseCount; + + // First, collect all the uses of the vreg, and sort them by their + // reaching definition (VNInfo). + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg), + UE = MRI->use_end(); UI != UE; ++UI) { + SlotIndex index = LIs->getInstructionIndex(&*UI); + index = index.getUseIndex(); + + const LiveRange* LR = (*LI)->getLiveRangeContaining(index); + VNUseCount[LR->valno].insert(&*UI); + } + + // Now, take the definitions (VNInfo's) one at a time and try to DCE + // and/or fold them away. + for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(), + VE = (*LI)->vni_end(); VI != VE; ++VI) { + + if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit) + return changed; + + VNInfo* CurrVN = *VI; + + // We don't currently try to handle definitions with PHI kills, because + // it would involve processing more than one VNInfo at once. + if (CurrVN->hasPHIKill()) continue; + + // We also don't try to handle the results of PHI joins, since there's + // no defining instruction to analyze. + if (!CurrVN->isDefAccurate() || CurrVN->isUnused()) continue; + + // We're only interested in eliminating cruft introduced by the splitter, + // is of the form load-use or load-use-store. First, check that the + // definition is a load, and remember what stack slot we loaded it from. + MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def); + int FrameIndex; + if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue; + + // If the definition has no uses at all, just DCE it. + if (VNUseCount[CurrVN].size() == 0) { + LIs->RemoveMachineInstrFromMaps(DefMI); + (*LI)->removeValNo(CurrVN); + DefMI->eraseFromParent(); + VNUseCount.erase(CurrVN); + ++NumDeadSpills; + changed = true; + continue; + } + + // Second, get the number of non-store uses of the definition, as well as + // a flag indicating whether it feeds into a later two-address definition. + bool FeedsTwoAddr = false; + unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN], + (*LI)->reg, FrameIndex, + FeedsTwoAddr); + + // If there's one non-store use and it doesn't feed a two-addr, then + // this is a load-use-store case that we can try to fold. + if (NonSpillCount == 1 && !FeedsTwoAddr) { + // Start by finding the non-store use MachineInstr. + SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin(); + int StoreFrameIndex; + unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); + while (UI != VNUseCount[CurrVN].end() && + (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) { + ++UI; + if (UI != VNUseCount[CurrVN].end()) + StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); + } + if (UI == VNUseCount[CurrVN].end()) continue; + + MachineInstr* use = *UI; + + // Attempt to fold it away! + int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false); + if (OpIdx == -1) continue; + SmallVector<unsigned, 1> Ops; + Ops.push_back(OpIdx); + if (!TII->canFoldMemoryOperand(use, Ops)) continue; + + MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex); + + if (!NewMI) continue; + + // Update relevant analyses. + LIs->RemoveMachineInstrFromMaps(DefMI); + LIs->ReplaceMachineInstrInMaps(use, NewMI); + (*LI)->removeValNo(CurrVN); + + DefMI->eraseFromParent(); + use->eraseFromParent(); + VNUseCount[CurrVN].erase(use); + + // Remove deleted instructions. Note that we need to remove them from + // the VNInfo->use map as well, just to be safe. + for (SmallPtrSet<MachineInstr*, 4>::iterator II = + VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end(); + II != IE; ++II) { + for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator + VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE; + ++VNI) + if (VNI->first != CurrVN) + VNI->second.erase(*II); + LIs->RemoveMachineInstrFromMaps(*II); + (*II)->eraseFromParent(); + } + + VNUseCount.erase(CurrVN); + + for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator + VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI) + if (VI->second.erase(use)) + VI->second.insert(NewMI); + + ++NumDeadSpills; + changed = true; + continue; + } + + // If there's more than one non-store instruction, we can't profitably + // fold it, so bail. + if (NonSpillCount) continue; + + // Otherwise, this is a load-store case, so DCE them. + for (SmallPtrSet<MachineInstr*, 4>::iterator UI = + VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end(); + UI != UE; ++UI) { + LIs->RemoveMachineInstrFromMaps(*UI); + (*UI)->eraseFromParent(); + } + + VNUseCount.erase(CurrVN); + + LIs->RemoveMachineInstrFromMaps(DefMI); + (*LI)->removeValNo(CurrVN); + DefMI->eraseFromParent(); + ++NumDeadSpills; + changed = true; + } + } + + return changed; +} + +bool PreAllocSplitting::createsNewJoin(LiveRange* LR, + MachineBasicBlock* DefMBB, + MachineBasicBlock* BarrierMBB) { + if (DefMBB == BarrierMBB) + return false; + + if (LR->valno->hasPHIKill()) + return false; + + SlotIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB); + if (LR->end < MBBEnd) + return false; + + MachineLoopInfo& MLI = getAnalysis<MachineLoopInfo>(); + if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB)) + return true; + + MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); + SmallPtrSet<MachineBasicBlock*, 4> Visited; + typedef std::pair<MachineBasicBlock*, + MachineBasicBlock::succ_iterator> ItPair; + SmallVector<ItPair, 4> Stack; + Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin())); + + while (!Stack.empty()) { + ItPair P = Stack.back(); + Stack.pop_back(); + + MachineBasicBlock* PredMBB = P.first; + MachineBasicBlock::succ_iterator S = P.second; + + if (S == PredMBB->succ_end()) + continue; + else if (Visited.count(*S)) { + Stack.push_back(std::make_pair(PredMBB, ++S)); + continue; + } else + Stack.push_back(std::make_pair(PredMBB, S+1)); + + MachineBasicBlock* MBB = *S; + Visited.insert(MBB); + + if (MBB == BarrierMBB) + return true; + + MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB); + MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB); + MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom(); + while (MDTN) { + if (MDTN == DefMDTN) + return true; + else if (MDTN == BarrierMDTN) + break; + MDTN = MDTN->getIDom(); + } + + MBBEnd = LIs->getMBBEndIdx(MBB); + if (LR->end > MBBEnd) + Stack.push_back(std::make_pair(MBB, MBB->succ_begin())); + } + + return false; +} + + +bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) { + CurrMF = &MF; + TM = &MF.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + MFI = MF.getFrameInfo(); + MRI = &MF.getRegInfo(); + SIs = &getAnalysis<SlotIndexes>(); + LIs = &getAnalysis<LiveIntervals>(); + LSs = &getAnalysis<LiveStacks>(); + VRM = &getAnalysis<VirtRegMap>(); + + bool MadeChange = false; + + // Make sure blocks are numbered in order. + MF.RenumberBlocks(); + + MachineBasicBlock *Entry = MF.begin(); + SmallPtrSet<MachineBasicBlock*,16> Visited; + + SmallPtrSet<LiveInterval*, 8> Split; + + for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + BarrierMBB = *DFI; + for (MachineBasicBlock::iterator I = BarrierMBB->begin(), + E = BarrierMBB->end(); I != E; ++I) { + Barrier = &*I; + const TargetRegisterClass **BarrierRCs = + Barrier->getDesc().getRegClassBarriers(); + if (!BarrierRCs) + continue; + BarrierIdx = LIs->getInstructionIndex(Barrier); + MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split); + } + } + + MadeChange |= removeDeadSpills(Split); + + return MadeChange; +} diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp new file mode 100644 index 0000000..b8831db --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -0,0 +1,295 @@ +//===---------------------- ProcessImplicitDefs.cpp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "processimplicitdefs" + +#include "llvm/CodeGen/ProcessImplicitDefs.h" + +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + + +using namespace llvm; + +char ProcessImplicitDefs::ID = 0; +INITIALIZE_PASS(ProcessImplicitDefs, "processimpdefs", + "Process Implicit Definitions.", false, false); + +void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<AliasAnalysis>(); + AU.addPreserved<LiveVariables>(); + AU.addRequired<LiveVariables>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + AU.addPreservedID(TwoAddressInstructionPassID); + AU.addPreservedID(PHIEliminationID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool +ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, + unsigned Reg, unsigned OpIdx, + const TargetInstrInfo *tii_, + SmallSet<unsigned, 8> &ImpDefRegs) { + switch(OpIdx) { + case 1: + return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 || + ImpDefRegs.count(MI->getOperand(0).getReg())); + case 2: + return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 || + ImpDefRegs.count(MI->getOperand(0).getReg())); + default: return false; + } +} + +static bool isUndefCopy(MachineInstr *MI, unsigned Reg, + const TargetInstrInfo *tii_, + SmallSet<unsigned, 8> &ImpDefRegs) { + if (MI->isCopy()) { + MachineOperand &MO0 = MI->getOperand(0); + MachineOperand &MO1 = MI->getOperand(1); + if (MO1.getReg() != Reg) + return false; + if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg())) + return true; + return false; + } + return false; +} + +/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure +/// there is one implicit_def for each use. Add isUndef marker to +/// implicit_def defs and their uses. +bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { + + DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" + << "********** Function: " + << ((Value*)fn.getFunction())->getName() << '\n'); + + bool Changed = false; + + const TargetInstrInfo *tii_ = fn.getTarget().getInstrInfo(); + const TargetRegisterInfo *tri_ = fn.getTarget().getRegisterInfo(); + MachineRegisterInfo *mri_ = &fn.getRegInfo(); + + LiveVariables *lv_ = &getAnalysis<LiveVariables>(); + + SmallSet<unsigned, 8> ImpDefRegs; + SmallVector<MachineInstr*, 8> ImpDefMIs; + SmallVector<MachineInstr*, 4> RUses; + SmallPtrSet<MachineBasicBlock*,16> Visited; + SmallPtrSet<MachineInstr*, 8> ModInsts; + + MachineBasicBlock *Entry = fn.begin(); + for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + MachineBasicBlock *MBB = *DFI; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ) { + MachineInstr *MI = &*I; + ++I; + if (MI->isImplicitDef()) { + if (MI->getOperand(0).getSubReg()) + continue; + unsigned Reg = MI->getOperand(0).getReg(); + ImpDefRegs.insert(Reg); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS) + ImpDefRegs.insert(*SS); + } + ImpDefMIs.push_back(MI); + continue; + } + + // Eliminate %reg1032:sub<def> = COPY undef. + if (MI->isCopy() && MI->getOperand(0).getSubReg()) { + MachineOperand &MO = MI->getOperand(1); + if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { + if (MO.isKill()) { + LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg()); + vi.removeKill(MI); + } + MI->eraseFromParent(); + Changed = true; + continue; + } + } + + bool ChangedToImpDef = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (!ImpDefRegs.count(Reg)) + continue; + // Use is a copy, just turn it into an implicit_def. + if (CanTurnIntoImplicitDef(MI, Reg, i, tii_, ImpDefRegs)) { + bool isKill = MO.isKill(); + MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); + for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) + MI->RemoveOperand(j); + if (isKill) { + ImpDefRegs.erase(Reg); + LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + vi.removeKill(MI); + } + ChangedToImpDef = true; + Changed = true; + break; + } + + Changed = true; + MO.setIsUndef(); + // This is a partial register redef of an implicit def. + // Make sure the whole register is defined by the instruction. + if (MO.isDef()) { + MI->addRegisterDefined(Reg); + continue; + } + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { + // Make sure other uses of + for (unsigned j = i+1; j != e; ++j) { + MachineOperand &MOJ = MI->getOperand(j); + if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg) + MOJ.setIsUndef(); + } + ImpDefRegs.erase(Reg); + } + } + + if (ChangedToImpDef) { + // Backtrack to process this new implicit_def. + --I; + } else { + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + ImpDefRegs.erase(MO.getReg()); + } + } + } + + // Any outstanding liveout implicit_def's? + for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) { + MachineInstr *MI = ImpDefMIs[i]; + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !ImpDefRegs.count(Reg)) { + // Delete all "local" implicit_def's. That include those which define + // physical registers since they cannot be liveout. + MI->eraseFromParent(); + Changed = true; + continue; + } + + // If there are multiple defs of the same register and at least one + // is not an implicit_def, do not insert implicit_def's before the + // uses. + bool Skip = false; + SmallVector<MachineInstr*, 4> DeadImpDefs; + for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg), + DE = mri_->def_end(); DI != DE; ++DI) { + MachineInstr *DeadImpDef = &*DI; + if (!DeadImpDef->isImplicitDef()) { + Skip = true; + break; + } + DeadImpDefs.push_back(DeadImpDef); + } + if (Skip) + continue; + + // The only implicit_def which we want to keep are those that are live + // out of its block. + for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j) + DeadImpDefs[j]->eraseFromParent(); + Changed = true; + + // Process each use instruction once. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), + UE = mri_->use_end(); UI != UE; ++UI) { + if (UI.getOperand().isUndef()) + continue; + MachineInstr *RMI = &*UI; + if (ModInsts.insert(RMI)) + RUses.push_back(RMI); + } + + for (unsigned i = 0, e = RUses.size(); i != e; ++i) { + MachineInstr *RMI = RUses[i]; + + // Turn a copy use into an implicit_def. + if (isUndefCopy(RMI, Reg, tii_, ImpDefRegs)) { + RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); + + bool isKill = false; + SmallVector<unsigned, 4> Ops; + for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { + MachineOperand &RRMO = RMI->getOperand(j); + if (RRMO.isReg() && RRMO.getReg() == Reg) { + Ops.push_back(j); + if (RRMO.isKill()) + isKill = true; + } + } + // Leave the other operands along. + for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) { + unsigned OpIdx = Ops[j]; + RMI->RemoveOperand(OpIdx-j); + } + + // Update LiveVariables varinfo if the instruction is a kill. + if (isKill) { + LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + vi.removeKill(RMI); + } + continue; + } + + // Replace Reg with a new vreg that's marked implicit. + const TargetRegisterClass* RC = mri_->getRegClass(Reg); + unsigned NewVReg = mri_->createVirtualRegister(RC); + bool isKill = true; + for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { + MachineOperand &RRMO = RMI->getOperand(j); + if (RRMO.isReg() && RRMO.getReg() == Reg) { + RRMO.setReg(NewVReg); + RRMO.setIsUndef(); + if (isKill) { + // Only the first operand of NewVReg is marked kill. + RRMO.setIsKill(); + isKill = false; + } + } + } + } + RUses.clear(); + ModInsts.clear(); + } + ImpDefRegs.clear(); + ImpDefMIs.clear(); + } + + return Changed; +} + diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp new file mode 100644 index 0000000..e2802c1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -0,0 +1,841 @@ +//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +// This pass provides an optional shrink wrapping variant of prolog/epilog +// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pei" +#include "PrologEpilogInserter.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <climits> + +using namespace llvm; + +char PEI::ID = 0; + +INITIALIZE_PASS(PEI, "prologepilog", + "Prologue/Epilogue Insertion", false, false); + +STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered"); +STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); + +/// createPrologEpilogCodeInserter - This function returns a pass that inserts +/// prolog and epilog code, and eliminates abstract frame references. +/// +FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } + +/// runOnMachineFunction - Insert prolog/epilog code and replace abstract +/// frame indexes with appropriate references. +/// +bool PEI::runOnMachineFunction(MachineFunction &Fn) { + const Function* F = Fn.getFunction(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); + + // Calculate the MaxCallFrameSize and AdjustsStack variables for the + // function's frame information. Also eliminates call frame pseudo + // instructions. + calculateCallsInformation(Fn); + + // Allow the target machine to make some adjustments to the function + // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. + TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); + + // Scan the function for modified callee saved registers and insert spill code + // for any callee saved registers that are modified. + calculateCalleeSavedRegisters(Fn); + + // Determine placement of CSR spill/restore code: + // - With shrink wrapping, place spills and restores to tightly + // enclose regions in the Machine CFG of the function where + // they are used. + // - Without shink wrapping (default), place all spills in the + // entry block, all restores in return blocks. + placeCSRSpillsAndRestores(Fn); + + // Add the code to save and restore the callee saved registers + if (!F->hasFnAttr(Attribute::Naked)) + insertCSRSpillsAndRestores(Fn); + + // Allow the target machine to make final modifications to the function + // before the frame layout is finalized. + TRI->processFunctionBeforeFrameFinalized(Fn); + + // Calculate actual frame offsets for all abstract stack objects... + calculateFrameObjectOffsets(Fn); + + // Add prolog and epilog code to the function. This function is required + // to align the stack frame as necessary for any stack variables or + // called functions. Because of this, calculateCalleeSavedRegisters() + // must be called before this function in order to set the AdjustsStack + // and MaxCallFrameSize variables. + if (!F->hasFnAttr(Attribute::Naked)) + insertPrologEpilogCode(Fn); + + // Replace all MO_FrameIndex operands with physical register references + // and actual offsets. + // + replaceFrameIndices(Fn); + + // If register scavenging is needed, as we've enabled doing it as a + // post-pass, scavenge the virtual registers that frame index elimiation + // inserted. + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) + scavengeFrameVirtualRegs(Fn); + + delete RS; + clearAllSets(); + return true; +} + +#if 0 +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + if (ShrinkWrapping || ShrinkWrapFunc != "") { + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + } + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} +#endif + +/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack +/// variables for the function's frame information and eliminate call frame +/// pseudo instructions. +void PEI::calculateCallsInformation(MachineFunction &Fn) { + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + unsigned MaxCallFrameSize = 0; + bool AdjustsStack = MFI->adjustsStack(); + + // Get the function call frame set-up and tear-down instruction opcode + int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); + int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); + + // Early exit for targets which have no call frame setup/destroy pseudo + // instructions. + if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) + return; + + std::vector<MachineBasicBlock::iterator> FrameSDOps; + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" + " instructions should have a single immediate argument!"); + unsigned Size = I->getOperand(0).getImm(); + if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; + AdjustsStack = true; + FrameSDOps.push_back(I); + } else if (I->isInlineAsm()) { + // Some inline asm's need a stack frame, as indicated by operand 1. + if (I->getOperand(1).getImm()) + AdjustsStack = true; + } + + MFI->setAdjustsStack(AdjustsStack); + MFI->setMaxCallFrameSize(MaxCallFrameSize); + + for (std::vector<MachineBasicBlock::iterator>::iterator + i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { + MachineBasicBlock::iterator I = *i; + + // If call frames are not being included as part of the stack frame, and + // the target doesn't indicate otherwise, remove the call frame pseudos + // here. The sub/add sp instruction pairs are still inserted, but we don't + // need to track the SP adjustment for frame index elimination. + if (RegInfo->canSimplifyCallFramePseudos(Fn)) + RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + } +} + + +/// calculateCalleeSavedRegisters - Scan the function for modified callee saved +/// registers. +void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + // Get the callee saved register list... + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + + // These are used to keep track the callee-save area. Initialize them. + MinCSFrameIndex = INT_MAX; + MaxCSFrameIndex = 0; + + // Early exit for targets which have no callee saved registers. + if (CSRegs == 0 || CSRegs[0] == 0) + return; + + // In Naked functions we aren't going to save any registers. + if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) + return; + + std::vector<CalleeSavedInfo> CSI; + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (Fn.getRegInfo().isPhysRegUsed(Reg)) { + // If the reg is modified, save it! + CSI.push_back(CalleeSavedInfo(Reg)); + } else { + for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); + *AliasSet; ++AliasSet) { // Check alias registers too. + if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { + CSI.push_back(CalleeSavedInfo(Reg)); + break; + } + } + } + } + + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! + + unsigned NumFixedSpillSlots; + const TargetFrameInfo::SpillSlot *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (std::vector<CalleeSavedInfo>::iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } + + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && + FixedSlot->Reg != Reg) + ++FixedSlot; + + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Align = std::min(Align, StackAlign); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } else { + // Spill it to the stack where we must. + FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); + } + + I->setFrameIdx(FrameIdx); + } + + MFI->setCalleeSavedInfo(CSI); +} + +/// insertCSRSpillsAndRestores - Insert spill and restore code for +/// callee saved registers used in the function, handling shrink wrapping. +/// +void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { + // Get callee saved register information. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + MFI->setCalleeSavedInfoValid(true); + + // Early exit if no callee saved registers are modified! + if (CSI.empty()) + return; + + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + MachineBasicBlock::iterator I; + + if (! ShrinkWrapThisFunction) { + // Spill using target interface. + I = EntryBlock->begin(); + if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. + // It's killed at the spill. + EntryBlock->addLiveIn(CSI[i].getReg()); + + // Insert the spill to the stack frame. + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, + CSI[i].getFrameIdx(), RC, TRI); + } + } + + // Restore using target interface. + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { + MachineBasicBlock* MBB = ReturnBlocks[ri]; + I = MBB->end(); --I; + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + I = I2; + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any + // terminators that preceed it. + if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, + CSI[i].getFrameIdx(), + RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } + } + return; + } + + // Insert spills. + std::vector<CalleeSavedInfo> blockCSI; + for (CSRegBlockMap::iterator BI = CSRSave.begin(), + BE = CSRSave.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet save = BI->second; + + if (save.empty()) + continue; + + blockCSI.clear(); + for (CSRegSet::iterator RI = save.begin(), + RE = save.end(); RI != RE; ++RI) { + blockCSI.push_back(CSI[*RI]); + } + assert(blockCSI.size() > 0 && + "Could not collect callee saved register info"); + + I = MBB->begin(); + + // When shrink wrapping, use stack slot stores/loads. + for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. + // It's killed at the spill. + MBB->addLiveIn(blockCSI[i].getReg()); + + // Insert the spill to the stack frame. + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*MBB, I, Reg, + true, + blockCSI[i].getFrameIdx(), + RC, TRI); + } + } + + for (CSRegBlockMap::iterator BI = CSRRestore.begin(), + BE = CSRRestore.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet restore = BI->second; + + if (restore.empty()) + continue; + + blockCSI.clear(); + for (CSRegSet::iterator RI = restore.begin(), + RE = restore.end(); RI != RE; ++RI) { + blockCSI.push_back(CSI[*RI]); + } + assert(blockCSI.size() > 0 && + "Could not find callee saved register info"); + + // If MBB is empty and needs restores, insert at the _beginning_. + if (MBB->empty()) { + I = MBB->begin(); + } else { + I = MBB->end(); + --I; + + // Skip over all terminator instructions, which are part of the + // return sequence. + if (! I->getDesc().isTerminator()) { + ++I; + } else { + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + I = I2; + } + } + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any + // terminators that preceed it. + for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, + blockCSI[i].getFrameIdx(), + RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +static inline void +AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, + bool StackGrowsDown, int64_t &Offset, + unsigned &MaxAlign) { + // If the stack grows down, add the object size to find the lowest address. + if (StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + if (StackGrowsDown) { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + } else { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, Offset); + Offset += MFI->getObjectSize(FrameIdx); + } +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + // Start at the beginning of the local area. + // The Offset is the distance from the stack top in the direction + // of stack growth -- so it's always nonnegative. + int LocalAreaOffset = TFI.getOffsetOfLocalArea(); + if (StackGrowsDown) + LocalAreaOffset = -LocalAreaOffset; + assert(LocalAreaOffset >= 0 + && "Local area offset should be in direction of stack growth"); + int64_t Offset = LocalAreaOffset; + + // If there are fixed sized objects that are preallocated in the local area, + // non-fixed objects can't be allocated right at the start of local area. + // We currently don't support filling in holes in between fixed sized + // objects, so we adjust 'Offset' to point to the end of last fixed sized + // preallocated object. + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { + int64_t FixedOff; + if (StackGrowsDown) { + // The maximum distance from the stack pointer is at lower address of + // the object -- which is given by offset. For down growing stack + // the offset is negative, so we negate the offset to get the distance. + FixedOff = -MFI->getObjectOffset(i); + } else { + // The maximum distance from the start pointer is at the upper + // address of the object. + FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); + } + if (FixedOff > Offset) Offset = FixedOff; + } + + // First assign frame offsets to stack objects that are used to spill + // callee saved registers. + if (StackGrowsDown) { + for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { + // If the stack grows down, we need to add the size to find the lowest + // address of the object. + Offset += MFI->getObjectSize(i); + + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MFI->setObjectOffset(i, -Offset); // Set the computed offset + } + } else { + int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; + for (int i = MaxCSFI; i >= MinCSFI ; --i) { + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MFI->setObjectOffset(i, Offset); + Offset += MFI->getObjectSize(i); + } + } + + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Make sure the special register scavenging spill slot is closest to the + // frame pointer if a frame pointer is required. + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { + int SFI = RS->getScavengingFrameIndex(); + if (SFI >= 0) + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); + } + + // FIXME: Once this is working, then enable flag will change to a target + // check for whether the frame is large enough to want to use virtual + // frame index registers. Functions which don't want/need this optimization + // will continue to use the existing code path. + if (MFI->getUseLocalStackAllocationBlock()) { + unsigned Align = MFI->getLocalFrameMaxAlign(); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); + + // Resolve offsets for objects in the local block. + for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { + std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); + int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; + DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << + FIOffset << "]\n"); + MFI->setObjectOffset(Entry.first, FIOffset); + } + // Allocate the local block + Offset += MFI->getLocalFrameSize(); + + MaxAlign = std::max(Align, MaxAlign); + } + + // Make sure that the stack protector comes before the local variables on the + // stack. + SmallSet<int, 16> LargeStackObjs; + if (MFI->getStackProtectorIndex() >= 0) { + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, + Offset, MaxAlign); + + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && (int)i == RS->getScavengingFrameIndex()) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (!MFI->MayNeedStackProtector(i)) + continue; + + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + LargeStackObjs.insert(i); + } + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && (int)i == RS->getScavengingFrameIndex()) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (LargeStackObjs.count(i)) + continue; + + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + } + + // Make sure the special register scavenging spill slot is closest to the + // stack pointer. + if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { + int SFI = RS->getScavengingFrameIndex(); + if (SFI >= 0) + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); + } + + if (!RegInfo->targetHandlesStackFrameRounding()) { + // If we have reserved argument space for call sites in the function + // immediately on entry to the current function, count it as part of the + // overall stack size. + if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn)) + Offset += MFI->getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) + StackAlign = TFI.getStackAlignment(); + else + StackAlign = TFI.getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + } + + // Update frame info to pretend that this is part of the stack... + MFI->setStackSize(Offset - LocalAreaOffset); +} + +/// insertPrologEpilogCode - Scan the function for modified callee saved +/// registers, insert spill code for these callee saved registers, then add +/// prolog and epilog code to the function. +/// +void PEI::insertPrologEpilogCode(MachineFunction &Fn) { + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + + // Add prologue to the function... + TRI->emitPrologue(Fn); + + // Add epilogue to restore the callee-save registers in each exiting block + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + // If last instruction is a return instruction, add an epilogue + if (!I->empty() && I->back().getDesc().isReturn()) + TRI->emitEpilogue(Fn, *I); + } +} + +/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical +/// register references and actual offsets. +/// +void PEI::replaceFrameIndices(MachineFunction &Fn) { + if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + + const TargetMachine &TM = Fn.getTarget(); + assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + const TargetFrameInfo *TFI = TM.getFrameInfo(); + bool StackGrowsDown = + TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); + + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { +#ifndef NDEBUG + int SPAdjCount = 0; // frame setup / destroy count. +#endif + int SPAdj = 0; // SP offset due to call frame setup / destroy. + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { +#ifndef NDEBUG + // Track whether we see even pairs of them + SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; +#endif + // Remember how much SP has been adjusted to create the call + // frame. + int Size = I->getOperand(0).getImm(); + + if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) + Size = -Size; + + SPAdj += Size; + + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = prior(I); + TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); + + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = llvm::next(PrevI); + continue; + } + + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).isFI()) { + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, + FrameIndexVirtualScavenging ? NULL : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } + + MI = 0; + break; + } + + if (DoIncr && I != BB->end()) ++I; + + // Update register states. + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); + } + + // If we have evenly matched pairs of frame setup / destroy instructions, + // make sure the adjustments come out to zero. If we don't have matched + // pairs, we can't be sure the missing bit isn't in another basic block + // due to a custom inserter playing tricks, so just asserting SPAdj==0 + // isn't sufficient. See tMOVCC on Thumb1, for example. + assert((SPAdjCount || SPAdj == 0) && + "Unbalanced call frame setup / destroy pairs?"); + } +} + +/// scavengeFrameVirtualRegs - Replace all frame index virtual registers +/// with physical registers. Use the register scavenger to find an +/// appropriate register to use. +void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { + // Run through the instructions and find any virtual registers. + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { + RS->enterBasicBlock(BB); + + unsigned VirtReg = 0; + unsigned ScratchReg = 0; + int SPAdj = 0; + + // The instruction stream may change in the loop, so check BB->end() + // directly. + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isReg()) { + MachineOperand &MO = MI->getOperand(i); + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + ++NumVirtualFrameRegs; + + // Have we already allocated a scratch register for this virtual? + if (Reg != VirtReg) { + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // Scavenge a new scratch register + VirtReg = Reg; + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + ScratchReg = RS->scavengeRegister(RC, I, SPAdj); + ++NumScavengedRegs; + } + // replace this reference to the virtual register with the + // scratch register. + assert (ScratchReg && "Missing scratch register!"); + MI->getOperand(i).setReg(ScratchReg); + + } + } + if (DoIncr) { + RS->forward(I); + ++I; + } + } + } +} diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h new file mode 100644 index 0000000..d575124 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h @@ -0,0 +1,175 @@ +//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +// This pass also implements a shrink wrapping variant of prolog/epilog +// insertion. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PEI_H +#define LLVM_CODEGEN_PEI_H + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Target/TargetRegisterInfo.h" + +namespace llvm { + class RegScavenger; + class MachineBasicBlock; + + class PEI : public MachineFunctionPass { + public: + static char ID; + PEI() : MachineFunctionPass(ID) {} + + const char *getPassName() const { + return "Prolog/Epilog Insertion & Frame Finalization"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn); + + private: + RegScavenger *RS; + + // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved + // stack frame indexes. + unsigned MinCSFrameIndex, MaxCSFrameIndex; + + // Analysis info for spill/restore placement. + // "CSR": "callee saved register". + + // CSRegSet contains indices into the Callee Saved Register Info + // vector built by calculateCalleeSavedRegisters() and accessed + // via MF.getFrameInfo()->getCalleeSavedInfo(). + typedef SparseBitVector<> CSRegSet; + + // CSRegBlockMap maps MachineBasicBlocks to sets of callee + // saved register indices. + typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap; + + // Set and maps for computing CSR spill/restore placement: + // used in function (UsedCSRegs) + // used in a basic block (CSRUsed) + // anticipatable in a basic block (Antic{In,Out}) + // available in a basic block (Avail{In,Out}) + // to be spilled at the entry to a basic block (CSRSave) + // to be restored at the end of a basic block (CSRRestore) + CSRegSet UsedCSRegs; + CSRegBlockMap CSRUsed; + CSRegBlockMap AnticIn, AnticOut; + CSRegBlockMap AvailIn, AvailOut; + CSRegBlockMap CSRSave; + CSRegBlockMap CSRRestore; + + // Entry and return blocks of the current function. + MachineBasicBlock* EntryBlock; + SmallVector<MachineBasicBlock*, 4> ReturnBlocks; + + // Map of MBBs to top level MachineLoops. + DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops; + + // Flag to control shrink wrapping per-function: + // may choose to skip shrink wrapping for certain + // functions. + bool ShrinkWrapThisFunction; + + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the curren function. + bool FrameIndexVirtualScavenging; + +#ifndef NDEBUG + // Machine function handle. + MachineFunction* MF; + + // Flag indicating that the current function + // has at least one "short" path in the machine + // CFG from the entry block to an exit block. + bool HasFastExitPath; +#endif + + bool calculateSets(MachineFunction &Fn); + bool calcAnticInOut(MachineBasicBlock* MBB); + bool calcAvailInOut(MachineBasicBlock* MBB); + void calculateAnticAvail(MachineFunction &Fn); + bool addUsesForMEMERegion(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4>& blks); + bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks); + bool calcSpillPlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevSpills); + bool calcRestorePlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevRestores); + void placeSpillsAndRestores(MachineFunction &Fn); + void placeCSRSpillsAndRestores(MachineFunction &Fn); + void calculateCallsInformation(MachineFunction &Fn); + void calculateCalleeSavedRegisters(MachineFunction &Fn); + void insertCSRSpillsAndRestores(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void scavengeFrameVirtualRegs(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); + + // Initialize DFA sets, called before iterations. + void clearAnticAvailSets(); + // Clear all sets constructed by shrink wrapping. + void clearAllSets(); + + // Initialize all shrink wrapping data. + void initShrinkWrappingInfo(); + + // Convienences for dealing with machine loops. + MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP); + MachineLoop* getTopLevelLoopParent(MachineLoop *LP); + + // Propgate CSRs used in MBB to all MBBs of loop LP. + void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP); + + // Convenience for recognizing return blocks. + bool isReturnBlock(MachineBasicBlock* MBB); + +#ifndef NDEBUG + // Debugging methods. + + // Mark this function as having fast exit paths. + void findFastExitPath(); + + // Verify placement of spills/restores. + void verifySpillRestorePlacement(); + + std::string getBasicBlockName(const MachineBasicBlock* MBB); + std::string stringifyCSRegSet(const CSRegSet& s); + void dumpSet(const CSRegSet& s); + void dumpUsed(MachineBasicBlock* MBB); + void dumpAllUsed(); + void dumpSets(MachineBasicBlock* MBB); + void dumpSets1(MachineBasicBlock* MBB); + void dumpAllSets(); + void dumpSRSets(); +#endif + + }; +} // End llvm namespace +#endif diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp new file mode 100644 index 0000000..5e86e5a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp @@ -0,0 +1,134 @@ +//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PseudoSourceValue class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Mutex.h" +#include <map> +using namespace llvm; + +namespace { +struct PSVGlobalsTy { + // PseudoSourceValues are immutable so don't need locking. + const PseudoSourceValue PSVs[4]; + sys::Mutex Lock; // Guards FSValues, but not the values inside it. + std::map<int, const PseudoSourceValue *> FSValues; + + PSVGlobalsTy() : PSVs() {} + ~PSVGlobalsTy() { + for (std::map<int, const PseudoSourceValue *>::iterator + I = FSValues.begin(), E = FSValues.end(); I != E; ++I) { + delete I->second; + } + } +}; + +static ManagedStatic<PSVGlobalsTy> PSVGlobals; + +} // anonymous namespace + +const PseudoSourceValue *PseudoSourceValue::getStack() +{ return &PSVGlobals->PSVs[0]; } +const PseudoSourceValue *PseudoSourceValue::getGOT() +{ return &PSVGlobals->PSVs[1]; } +const PseudoSourceValue *PseudoSourceValue::getJumpTable() +{ return &PSVGlobals->PSVs[2]; } +const PseudoSourceValue *PseudoSourceValue::getConstantPool() +{ return &PSVGlobals->PSVs[3]; } + +static const char *const PSVNames[] = { + "Stack", + "GOT", + "JumpTable", + "ConstantPool" +}; + +// FIXME: THIS IS A HACK!!!! +// Eventually these should be uniqued on LLVMContext rather than in a managed +// static. For now, we can safely use the global context for the time being to +// squeak by. +PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) : + Value(Type::getInt8PtrTy(getGlobalContext()), + Subclass) {} + +void PseudoSourceValue::printCustom(raw_ostream &O) const { + O << PSVNames[this - PSVGlobals->PSVs]; +} + +const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) { + PSVGlobalsTy &PG = *PSVGlobals; + sys::ScopedLock locked(PG.Lock); + const PseudoSourceValue *&V = PG.FSValues[FI]; + if (!V) + V = new FixedStackPseudoSourceValue(FI); + return V; +} + +bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { + if (this == getStack()) + return false; + if (this == getGOT() || + this == getConstantPool() || + this == getJumpTable()) + return true; + llvm_unreachable("Unknown PseudoSourceValue!"); + return false; +} + +bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { + if (this == getStack() || + this == getGOT() || + this == getConstantPool() || + this == getJumpTable()) + return false; + llvm_unreachable("Unknown PseudoSourceValue!"); + return true; +} + +bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { + if (this == getGOT() || + this == getConstantPool() || + this == getJumpTable()) + return false; + return true; +} + +bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{ + return MFI && MFI->isImmutableObjectIndex(FI); +} + +bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { + // Negative frame indices are used for special things that don't + // appear in LLVM IR. Non-negative indices may be used for things + // like static allocas. + if (!MFI) + return FI >= 0; + // Spill slots should not alias others. + return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI); +} + +bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { + if (!MFI) + return true; + // Spill slots will not alias any LLVM IR value. + return !MFI->isSpillSlotObjectIndex(FI); +} + +void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const { + OS << "FixedStack" << FI; +} diff --git a/contrib/llvm/lib/CodeGen/README.txt b/contrib/llvm/lib/CodeGen/README.txt new file mode 100644 index 0000000..b655dda4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/README.txt @@ -0,0 +1,199 @@ +//===---------------------------------------------------------------------===// + +Common register allocation / spilling problem: + + mul lr, r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + ldr r4, [sp, #+52] + mla r4, r3, lr, r4 + +can be: + + mul lr, r4, lr + mov r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + mla r4, r3, lr, r4 + +and then "merge" mul and mov: + + mul r4, r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + mla r4, r3, lr, r4 + +It also increase the likelyhood the store may become dead. + +//===---------------------------------------------------------------------===// + +bb27 ... + ... + %reg1037 = ADDri %reg1039, 1 + %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10 + Successors according to CFG: 0x8b03bf0 (#5) + +bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5): + Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4) + %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0> + +Note ADDri is not a two-address instruction. However, its result %reg1037 is an +operand of the PHI node in bb76 and its operand %reg1039 is the result of the +PHI node. We should treat it as a two-address code and make sure the ADDri is +scheduled after any node that reads %reg1039. + +//===---------------------------------------------------------------------===// + +Use local info (i.e. register scavenger) to assign it a free register to allow +reuse: + ldr r3, [sp, #+4] + add r3, r3, #3 + ldr r2, [sp, #+8] + add r2, r2, #2 + ldr r1, [sp, #+4] <== + add r1, r1, #1 + ldr r0, [sp, #+4] + add r0, r0, #2 + +//===---------------------------------------------------------------------===// + +LLVM aggressively lift CSE out of loop. Sometimes this can be negative side- +effects: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +load [i + R1] +... +load [i + R2] +... +load [i + R3] + +Suppose there is high register pressure, R1, R2, R3, can be spilled. We need +to implement proper re-materialization to handle this: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +R1 = X + 4 @ re-materialized +load [i + R1] +... +R2 = X + 7 @ re-materialized +load [i + R2] +... +R3 = X + 15 @ re-materialized +load [i + R3] + +Furthermore, with re-association, we can enable sharing: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +T = i + X +load [T + 4] +... +load [T + 7] +... +load [T + 15] +//===---------------------------------------------------------------------===// + +It's not always a good idea to choose rematerialization over spilling. If all +the load / store instructions would be folded then spilling is cheaper because +it won't require new live intervals / registers. See 2003-05-31-LongShifts for +an example. + +//===---------------------------------------------------------------------===// + +With a copying garbage collector, derived pointers must not be retained across +collector safe points; the collector could move the objects and invalidate the +derived pointer. This is bad enough in the first place, but safe points can +crop up unpredictably. Consider: + + %array = load { i32, [0 x %obj] }** %array_addr + %nth_el = getelementptr { i32, [0 x %obj] }* %array, i32 0, i32 %n + %old = load %obj** %nth_el + %z = div i64 %x, %y + store %obj* %new, %obj** %nth_el + +If the i64 division is lowered to a libcall, then a safe point will (must) +appear for the call site. If a collection occurs, %array and %nth_el no longer +point into the correct object. + +The fix for this is to copy address calculations so that dependent pointers +are never live across safe point boundaries. But the loads cannot be copied +like this if there was an intervening store, so may be hard to get right. + +Only a concurrent mutator can trigger a collection at the libcall safe point. +So single-threaded programs do not have this requirement, even with a copying +collector. Still, LLVM optimizations would probably undo a front-end's careful +work. + +//===---------------------------------------------------------------------===// + +The ocaml frametable structure supports liveness information. It would be good +to support it. + +//===---------------------------------------------------------------------===// + +The FIXME in ComputeCommonTailLength in BranchFolding.cpp needs to be +revisited. The check is there to work around a misuse of directives in inline +assembly. + +//===---------------------------------------------------------------------===// + +It would be good to detect collector/target compatibility instead of silently +doing the wrong thing. + +//===---------------------------------------------------------------------===// + +It would be really nice to be able to write patterns in .td files for copies, +which would eliminate a bunch of explicit predicates on them (e.g. no side +effects). Once this is in place, it would be even better to have tblgen +synthesize the various copy insertion/inspection methods in TargetInstrInfo. + +//===---------------------------------------------------------------------===// + +Stack coloring improvments: + +1. Do proper LiveStackAnalysis on all stack objects including those which are + not spill slots. +2. Reorder objects to fill in gaps between objects. + e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4 + +//===---------------------------------------------------------------------===// + +The scheduler should be able to sort nearby instructions by their address. For +example, in an expanded memset sequence it's not uncommon to see code like this: + + movl $0, 4(%rdi) + movl $0, 8(%rdi) + movl $0, 12(%rdi) + movl $0, 0(%rdi) + +Each of the stores is independent, and the scheduler is currently making an +arbitrary decision about the order. + +//===---------------------------------------------------------------------===// + +Another opportunitiy in this code is that the $0 could be moved to a register: + + movl $0, 4(%rdi) + movl $0, 8(%rdi) + movl $0, 12(%rdi) + movl $0, 0(%rdi) + +This would save substantial code size, especially for longer sequences like +this. It would be easy to have a rule telling isel to avoid matching MOV32mi +if the immediate has more than some fixed number of uses. It's more involved +to teach the register allocator how to do late folding to recover from +excessive register pressure. + diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp new file mode 100644 index 0000000..fc150d5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -0,0 +1,1048 @@ +//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This register allocator allocates registers to a basic block at a time, +// attempting to keep values in registers and reusing registers as appropriate. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumCopies, "Number of copies coalesced"); + +static RegisterRegAlloc + fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); + +namespace { + class RAFast : public MachineFunctionPass { + public: + static char ID; + RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), + isBulkSpilling(false) {} + private: + const TargetMachine *TM; + MachineFunction *MF; + MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // Basic block currently being allocated. + MachineBasicBlock *MBB; + + // StackSlotForVirtReg - Maps virtual regs to the frame index where these + // values are spilled. + IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; + + // Everything we know about a live virtual register. + struct LiveReg { + MachineInstr *LastUse; // Last instr to use reg. + unsigned PhysReg; // Currently held here. + unsigned short LastOpNum; // OpNum on LastUse. + bool Dirty; // Register needs spill. + + LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0), + Dirty(false) {} + }; + + typedef DenseMap<unsigned, LiveReg> LiveRegMap; + typedef LiveRegMap::value_type LiveRegEntry; + + // LiveVirtRegs - This map contains entries for each virtual register + // that is currently available in a physical register. + LiveRegMap LiveVirtRegs; + + DenseMap<unsigned, MachineInstr *> LiveDbgValueMap; + + // RegState - Track the state of a physical register. + enum RegState { + // A disabled register is not available for allocation, but an alias may + // be in use. A register can only be moved out of the disabled state if + // all aliases are disabled. + regDisabled, + + // A free register is not currently in use and can be allocated + // immediately without checking aliases. + regFree, + + // A reserved register has been assigned expolicitly (e.g., setting up a + // call parameter), and it remains reserved until it is used. + regReserved + + // A register state may also be a virtual register number, indication that + // the physical register is currently allocated to a virtual register. In + // that case, LiveVirtRegs contains the inverse mapping. + }; + + // PhysRegState - One of the RegState enums, or a virtreg. + std::vector<unsigned> PhysRegState; + + // UsedInInstr - BitVector of physregs that are used in the current + // instruction, and so cannot be allocated. + BitVector UsedInInstr; + + // Allocatable - vector of allocatable physical registers. + BitVector Allocatable; + + // SkippedInstrs - Descriptors of instructions whose clobber list was + // ignored because all registers were spilled. It is still necessary to + // mark all the clobbered registers as used by the function. + SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs; + + // isBulkSpilling - This flag is set when LiveRegMap will be cleared + // completely after spilling all live registers. LiveRegMap entries should + // not be erased. + bool isBulkSpilling; + + enum { + spillClean = 1, + spillDirty = 100, + spillImpossible = ~0u + }; + public: + virtual const char *getPassName() const { + return "Fast Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + bool runOnMachineFunction(MachineFunction &Fn); + void AllocateBasicBlock(); + void handleThroughOperands(MachineInstr *MI, + SmallVectorImpl<unsigned> &VirtDead); + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + bool isLastUseOfLocalReg(MachineOperand&); + + void addKillFlag(const LiveReg&); + void killVirtReg(LiveRegMap::iterator); + void killVirtReg(unsigned VirtReg); + void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator); + void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); + + void usePhysReg(MachineOperand&); + void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); + unsigned calcSpillCost(unsigned PhysReg) const; + void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg); + void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint); + LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); + LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); + void spillAll(MachineInstr *MI); + bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); + }; + char RAFast::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual register +/// to be held on the stack. +int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + int SS = StackSlotForVirtReg[VirtReg]; + if (SS != -1) + return SS; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot. + StackSlotForVirtReg[VirtReg] = FrameIdx; + return FrameIdx; +} + +/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to +/// its virtual register, and it is guaranteed to be a block-local register. +/// +bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { + // Check for non-debug uses or defs following MO. + // This is the most likely way to fail - fast path it. + MachineOperand *Next = &MO; + while ((Next = Next->getNextOperandForReg())) + if (!Next->isDebug()) + return false; + + // If the register has ever been spilled or reloaded, we conservatively assume + // it is a global register used in multiple blocks. + if (StackSlotForVirtReg[MO.getReg()] != -1) + return false; + + // Check that the use/def chain has exactly one operand - MO. + return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO; +} + +/// addKillFlag - Set kill flags on last use of a virtual register. +void RAFast::addKillFlag(const LiveReg &LR) { + if (!LR.LastUse) return; + MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); + if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { + if (MO.getReg() == LR.PhysReg) + MO.setIsKill(); + else + LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true); + } +} + +/// killVirtReg - Mark virtreg as no longer available. +void RAFast::killVirtReg(LiveRegMap::iterator LRI) { + addKillFlag(LRI->second); + const LiveReg &LR = LRI->second; + assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping"); + PhysRegState[LR.PhysReg] = regFree; + // Erase from LiveVirtRegs unless we're spilling in bulk. + if (!isBulkSpilling) + LiveVirtRegs.erase(LRI); +} + +/// killVirtReg - Mark virtreg as no longer available. +void RAFast::killVirtReg(unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "killVirtReg needs a virtual register"); + LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg); + if (LRI != LiveVirtRegs.end()) + killVirtReg(LRI); +} + +/// spillVirtReg - This method spills the value specified by VirtReg into the +/// corresponding stack slot if needed. +void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Spilling a physical register is illegal!"); + LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg); + assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register"); + spillVirtReg(MI, LRI); +} + +/// spillVirtReg - Do the actual work of spilling. +void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, + LiveRegMap::iterator LRI) { + LiveReg &LR = LRI->second; + assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping"); + + if (LR.Dirty) { + // If this physreg is used by the instruction, we want to kill it on the + // instruction, not on the spill. + bool SpillKill = LR.LastUse != MI; + LR.Dirty = false; + DEBUG(dbgs() << "Spilling %reg" << LRI->first + << " in " << TRI->getName(LR.PhysReg)); + const TargetRegisterClass *RC = MRI->getRegClass(LRI->first); + int FI = getStackSpaceFor(LRI->first, RC); + DEBUG(dbgs() << " to stack slot #" << FI << "\n"); + TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI); + ++NumStores; // Update statistics + + // If this register is used by DBG_VALUE then insert new DBG_VALUE to + // identify spilled location as the place to find corresponding variable's + // value. + if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) { + const MDNode *MDPtr = + DBG->getOperand(DBG->getNumOperands()-1).getMetadata(); + int64_t Offset = 0; + if (DBG->getOperand(1).isImm()) + Offset = DBG->getOperand(1).getImm(); + DebugLoc DL; + if (MI == MBB->end()) { + // If MI is at basic block end then use last instruction's location. + MachineBasicBlock::iterator EI = MI; + DL = (--EI)->getDebugLoc(); + } + else + DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = + TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) { + MachineBasicBlock *MBB = DBG->getParent(); + MBB->insert(MI, NewDV); + DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); + LiveDbgValueMap[LRI->first] = NewDV; + } + } + if (SpillKill) + LR.LastUse = 0; // Don't kill register again + } + killVirtReg(LRI); +} + +/// spillAll - Spill all dirty virtregs without killing them. +void RAFast::spillAll(MachineInstr *MI) { + if (LiveVirtRegs.empty()) return; + isBulkSpilling = true; + // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order + // of spilling here is deterministic, if arbitrary. + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); + i != e; ++i) + spillVirtReg(MI, i); + LiveVirtRegs.clear(); + isBulkSpilling = false; +} + +/// usePhysReg - Handle the direct use of a physical register. +/// Check that the register is not used by a virtreg. +/// Kill the physreg, marking it free. +/// This may add implicit kills to MO->getParent() and invalidate MO. +void RAFast::usePhysReg(MachineOperand &MO) { + unsigned PhysReg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && + "Bad usePhysReg operand"); + + switch (PhysRegState[PhysReg]) { + case regDisabled: + break; + case regReserved: + PhysRegState[PhysReg] = regFree; + // Fall through + case regFree: + UsedInInstr.set(PhysReg); + MO.setIsKill(); + return; + default: + // The physreg was allocated to a virtual register. That means to value we + // wanted has been clobbered. + llvm_unreachable("Instruction uses an allocated register"); + } + + // Maybe a superregister is reserved? + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) { + switch (PhysRegState[Alias]) { + case regDisabled: + break; + case regReserved: + assert(TRI->isSuperRegister(PhysReg, Alias) && + "Instruction is not using a subregister of a reserved register"); + // Leave the superregister in the working set. + PhysRegState[Alias] = regFree; + UsedInInstr.set(Alias); + MO.getParent()->addRegisterKilled(Alias, TRI, true); + return; + case regFree: + if (TRI->isSuperRegister(PhysReg, Alias)) { + // Leave the superregister in the working set. + UsedInInstr.set(Alias); + MO.getParent()->addRegisterKilled(Alias, TRI, true); + return; + } + // Some other alias was in the working set - clear it. + PhysRegState[Alias] = regDisabled; + break; + default: + llvm_unreachable("Instruction uses an alias of an allocated register"); + } + } + + // All aliases are disabled, bring register into working set. + PhysRegState[PhysReg] = regFree; + UsedInInstr.set(PhysReg); + MO.setIsKill(); +} + +/// definePhysReg - Mark PhysReg as reserved or free after spilling any +/// virtregs. This is very similar to defineVirtReg except the physreg is +/// reserved instead of allocated. +void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, + RegState NewState) { + UsedInInstr.set(PhysReg); + switch (unsigned VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + default: + spillVirtReg(MI, VirtReg); + // Fall through. + case regFree: + case regReserved: + PhysRegState[PhysReg] = NewState; + return; + } + + // This is a disabled register, disable all aliases. + PhysRegState[PhysReg] = NewState; + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) { + UsedInInstr.set(Alias); + switch (unsigned VirtReg = PhysRegState[Alias]) { + case regDisabled: + break; + default: + spillVirtReg(MI, VirtReg); + // Fall through. + case regFree: + case regReserved: + PhysRegState[Alias] = regDisabled; + if (TRI->isSuperRegister(PhysReg, Alias)) + return; + break; + } + } +} + + +// calcSpillCost - Return the cost of spilling clearing out PhysReg and +// aliases so it is free for allocation. +// Returns 0 when PhysReg is free or disabled with all aliases disabled - it +// can be allocated directly. +// Returns spillImpossible when PhysReg or an alias can't be spilled. +unsigned RAFast::calcSpillCost(unsigned PhysReg) const { + if (UsedInInstr.test(PhysReg)) + return spillImpossible; + switch (unsigned VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + case regFree: + return 0; + case regReserved: + return spillImpossible; + default: + return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; + } + + // This is a disabled register, add up const of aliases. + unsigned Cost = 0; + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) { + if (UsedInInstr.test(Alias)) + return spillImpossible; + switch (unsigned VirtReg = PhysRegState[Alias]) { + case regDisabled: + break; + case regFree: + ++Cost; + break; + case regReserved: + return spillImpossible; + default: + Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; + break; + } + } + return Cost; +} + + +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. +/// +void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) { + DEBUG(dbgs() << "Assigning %reg" << LRE.first << " to " + << TRI->getName(PhysReg) << "\n"); + PhysRegState[PhysReg] = LRE.first; + assert(!LRE.second.PhysReg && "Already assigned a physreg"); + LRE.second.PhysReg = PhysReg; +} + +/// allocVirtReg - Allocate a physical register for VirtReg. +void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { + const unsigned VirtReg = LRE.first; + + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Can only allocate virtual registers"); + + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + + // Ignore invalid hints. + if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || + !RC->contains(Hint) || !Allocatable.test(Hint))) + Hint = 0; + + // Take hint when possible. + if (Hint) { + switch(calcSpillCost(Hint)) { + default: + definePhysReg(MI, Hint, regFree); + // Fall through. + case 0: + return assignVirtToPhysReg(LRE, Hint); + case spillImpossible: + break; + } + } + + TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF); + + // First try to find a completely free register. + for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { + unsigned PhysReg = *I; + if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg) && + Allocatable.test(PhysReg)) + return assignVirtToPhysReg(LRE, PhysReg); + } + + DEBUG(dbgs() << "Allocating %reg" << VirtReg << " from " << RC->getName() + << "\n"); + + unsigned BestReg = 0, BestCost = spillImpossible; + for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { + if (!Allocatable.test(*I)) + continue; + unsigned Cost = calcSpillCost(*I); + // Cost is 0 when all aliases are already disabled. + if (Cost == 0) + return assignVirtToPhysReg(LRE, *I); + if (Cost < BestCost) + BestReg = *I, BestCost = Cost; + } + + if (BestReg) { + definePhysReg(MI, BestReg, regFree); + return assignVirtToPhysReg(LRE, BestReg); + } + + // Nothing we can do. + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; + if (MI->isInlineAsm()) { + Msg << "\nPlease check your inline asm statement for " + << "invalid constraints:\n"; + MI->print(Msg, TM); + } + report_fatal_error(Msg.str()); +} + +/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. +RAFast::LiveRegMap::iterator +RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register"); + LiveRegMap::iterator LRI; + bool New; + tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); + LiveReg &LR = LRI->second; + if (New) { + // If there is no hint, peek at the only use of this register. + if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && + MRI->hasOneNonDBGUse(VirtReg)) { + const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg); + // It's a copy, use the destination register as a hint. + if (UseMI.isCopyLike()) + Hint = UseMI.getOperand(0).getReg(); + } + allocVirtReg(MI, *LRI, Hint); + } else if (LR.LastUse) { + // Redefining a live register - kill at the last use, unless it is this + // instruction defining VirtReg multiple times. + if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse()) + addKillFlag(LR); + } + assert(LR.PhysReg && "Register not assigned"); + LR.LastUse = MI; + LR.LastOpNum = OpNum; + LR.Dirty = true; + UsedInInstr.set(LR.PhysReg); + return LRI; +} + +/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. +RAFast::LiveRegMap::iterator +RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register"); + LiveRegMap::iterator LRI; + bool New; + tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); + LiveReg &LR = LRI->second; + MachineOperand &MO = MI->getOperand(OpNum); + if (New) { + allocVirtReg(MI, *LRI, Hint); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into " + << TRI->getName(LR.PhysReg) << "\n"); + TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI); + ++NumLoads; + } else if (LR.Dirty) { + if (isLastUseOfLocalReg(MO)) { + DEBUG(dbgs() << "Killing last use: " << MO << "\n"); + if (MO.isUse()) + MO.setIsKill(); + else + MO.setIsDead(); + } else if (MO.isKill()) { + DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); + MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); + MO.setIsDead(false); + } + } else if (MO.isKill()) { + // We must remove kill flags from uses of reloaded registers because the + // register would be killed immediately, and there might be a second use: + // %foo = OR %x<kill>, %x + // This would cause a second reload of %x into a different register. + DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); + MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); + MO.setIsDead(false); + } + assert(LR.PhysReg && "Register not assigned"); + LR.LastUse = MI; + LR.LastOpNum = OpNum; + UsedInInstr.set(LR.PhysReg); + return LRI; +} + +// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering +// subregs. This may invalidate any operand pointers. +// Return true if the operand kills its register. +bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { + MachineOperand &MO = MI->getOperand(OpNum); + if (!MO.getSubReg()) { + MO.setReg(PhysReg); + return MO.isKill() || MO.isDead(); + } + + // Handle subregister index. + MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); + MO.setSubReg(0); + + // A kill flag implies killing the full register. Add corresponding super + // register kill. + if (MO.isKill()) { + MI->addRegisterKilled(PhysReg, TRI, true); + return true; + } + return MO.isDead(); +} + +// Handle special instruction operand like early clobbers and tied ops when +// there are additional physreg defines. +void RAFast::handleThroughOperands(MachineInstr *MI, + SmallVectorImpl<unsigned> &VirtDead) { + DEBUG(dbgs() << "Scanning for through registers:"); + SmallSet<unsigned, 8> ThroughRegs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) || + (MO.getSubReg() && MI->readsVirtualRegister(Reg))) { + if (ThroughRegs.insert(Reg)) + DEBUG(dbgs() << " %reg" << Reg); + } + } + + // If any physreg defines collide with preallocated through registers, + // we must spill and reallocate. + DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + UsedInInstr.set(Reg); + if (ThroughRegs.count(PhysRegState[Reg])) + definePhysReg(MI, Reg, regFree); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + UsedInInstr.set(*AS); + if (ThroughRegs.count(PhysRegState[*AS])) + definePhysReg(MI, *AS, regFree); + } + } + + SmallVector<unsigned, 8> PartialDefs; + DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (MO.isUse()) { + unsigned DefIdx = 0; + if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue; + DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand " + << DefIdx << ".\n"); + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; + setPhysReg(MI, i, PhysReg); + // Note: we don't update the def operand yet. That would cause the normal + // def-scan to attempt spilling. + } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) { + DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); + // Reload the register, but don't assign to the operand just yet. + // That would confuse the later phys-def processing pass. + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + PartialDefs.push_back(LRI->second.PhysReg); + } else if (MO.isEarlyClobber()) { + // Note: defineVirtReg may invalidate MO. + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; + if (setPhysReg(MI, i, PhysReg)) + VirtDead.push_back(Reg); + } + } + + // Restore UsedInInstr to a state usable for allocating normal virtual uses. + UsedInInstr.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + UsedInInstr.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + UsedInInstr.set(*AS); + } + + // Also mark PartialDefs as used to avoid reallocation. + for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) + UsedInInstr.set(PartialDefs[i]); +} + +void RAFast::AllocateBasicBlock() { + DEBUG(dbgs() << "\nAllocating " << *MBB); + + PhysRegState.assign(TRI->getNumRegs(), regDisabled); + assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?"); + + MachineBasicBlock::iterator MII = MBB->begin(); + + // Add live-in registers as live. + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + if (Allocatable.test(*I)) + definePhysReg(MII, *I, regReserved); + + SmallVector<unsigned, 8> VirtDead; + SmallVector<MachineInstr*, 32> Coalesced; + + // Otherwise, sequentially allocate each instruction in the MBB. + while (MII != MBB->end()) { + MachineInstr *MI = MII++; + const TargetInstrDesc &TID = MI->getDesc(); + DEBUG({ + dbgs() << "\n>> " << *MI << "Regs:"; + for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { + if (PhysRegState[Reg] == regDisabled) continue; + dbgs() << " " << TRI->getName(Reg); + switch(PhysRegState[Reg]) { + case regFree: + break; + case regReserved: + dbgs() << "*"; + break; + default: + dbgs() << "=%reg" << PhysRegState[Reg]; + if (LiveVirtRegs[PhysRegState[Reg]].Dirty) + dbgs() << "*"; + assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg && + "Bad inverse map"); + break; + } + } + dbgs() << '\n'; + // Check that LiveVirtRegs is the inverse. + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), + e = LiveVirtRegs.end(); i != e; ++i) { + assert(TargetRegisterInfo::isVirtualRegister(i->first) && + "Bad map key"); + assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) && + "Bad map value"); + assert(PhysRegState[i->second.PhysReg] == i->first && + "Bad inverse map"); + } + }); + + // Debug values are not allowed to change codegen in any way. + if (MI->isDebugValue()) { + bool ScanDbgValue = true; + while (ScanDbgValue) { + ScanDbgValue = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + LiveDbgValueMap[Reg] = MI; + LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); + if (LRI != LiveVirtRegs.end()) + setPhysReg(MI, i, LRI->second.PhysReg); + else { + int SS = StackSlotForVirtReg[Reg]; + if (SS == -1) + // We can't allocate a physreg for a DebugValue, sorry! + MO.setReg(0); + else { + // Modify DBG_VALUE now that the value is in a spill slot. + int64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = + MI->getOperand(MI->getNumOperands()-1).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = + TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << + "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + // Scan NewDV operands from the beginning. + MI = NewDV; + ScanDbgValue = true; + break; + } else + // We can't allocate a physreg for a DebugValue; sorry! + MO.setReg(0); + } + } + } + } + // Next instruction. + continue; + } + + // If this is a copy, we may be able to coalesce. + unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0; + if (MI->isCopy()) { + CopyDst = MI->getOperand(0).getReg(); + CopySrc = MI->getOperand(1).getReg(); + CopyDstSub = MI->getOperand(0).getSubReg(); + CopySrcSub = MI->getOperand(1).getSubReg(); + } + + // Track registers used by instruction. + UsedInInstr.reset(); + + // First scan. + // Mark physreg uses and early clobbers as used. + // Find the end of the virtreg operands + unsigned VirtOpEnd = 0; + bool hasTiedOps = false; + bool hasEarlyClobbers = false; + bool hasPartialRedefs = false; + bool hasPhysDefs = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg) continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + VirtOpEnd = i+1; + if (MO.isUse()) { + hasTiedOps = hasTiedOps || + TID.getOperandConstraint(i, TOI::TIED_TO) != -1; + } else { + if (MO.isEarlyClobber()) + hasEarlyClobbers = true; + if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) + hasPartialRedefs = true; + } + continue; + } + if (!Allocatable.test(Reg)) continue; + if (MO.isUse()) { + usePhysReg(MO); + } else if (MO.isEarlyClobber()) { + definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? + regFree : regReserved); + hasEarlyClobbers = true; + } else + hasPhysDefs = true; + } + + // The instruction may have virtual register operands that must be allocated + // the same register at use-time and def-time: early clobbers and tied + // operands. If there are also physical defs, these registers must avoid + // both physical defs and uses, making them more constrained than normal + // operands. + // Similarly, if there are multiple defs and tied operands, we must make + // sure the same register is allocated to uses and defs. + // We didn't detect inline asm tied operands above, so just make this extra + // pass for all inline asm. + if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || + (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) { + handleThroughOperands(MI, VirtDead); + // Don't attempt coalescing when we have funny stuff going on. + CopyDst = 0; + // Pretend we have early clobbers so the use operands get marked below. + // This is not necessary for the common case of a single tied use. + hasEarlyClobbers = true; + } + + // Second scan. + // Allocate virtreg uses. + for (unsigned i = 0; i != VirtOpEnd; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (MO.isUse()) { + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst); + unsigned PhysReg = LRI->second.PhysReg; + CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; + if (setPhysReg(MI, i, PhysReg)) + killVirtReg(LRI); + } + } + + MRI->addPhysRegsUsed(UsedInInstr); + + // Track registers defined by instruction - early clobbers and tied uses at + // this point. + UsedInInstr.reset(); + if (hasEarlyClobbers) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + // Look for physreg defs and tied uses. + if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; + UsedInInstr.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + UsedInInstr.set(*AS); + } + } + + unsigned DefOpEnd = MI->getNumOperands(); + if (TID.isCall()) { + // Spill all virtregs before a call. This serves two purposes: 1. If an + // exception is thrown, the landing pad is going to expect to find + // registers in their spill slots, and 2. we don't have to wade through + // all the <imp-def> operands on the call instruction. + DefOpEnd = VirtOpEnd; + DEBUG(dbgs() << " Spilling remaining registers before call.\n"); + spillAll(MI); + + // The imp-defs are skipped below, but we still need to mark those + // registers as used by the function. + SkippedInstrs.insert(&TID); + } + + // Third scan. + // Allocate defs and collect dead defs. + for (unsigned i = 0; i != DefOpEnd; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) + continue; + unsigned Reg = MO.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (!Allocatable.test(Reg)) continue; + definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? + regFree : regReserved); + continue; + } + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc); + unsigned PhysReg = LRI->second.PhysReg; + if (setPhysReg(MI, i, PhysReg)) { + VirtDead.push_back(Reg); + CopyDst = 0; // cancel coalescing; + } else + CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0; + } + + // Kill dead defs after the scan to ensure that multiple defs of the same + // register are allocated identically. We didn't need to do this for uses + // because we are crerating our own kill flags, and they are always at the + // last use. + for (unsigned i = 0, e = VirtDead.size(); i != e; ++i) + killVirtReg(VirtDead[i]); + VirtDead.clear(); + + MRI->addPhysRegsUsed(UsedInInstr); + + if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { + DEBUG(dbgs() << "-- coalescing: " << *MI); + Coalesced.push_back(MI); + } else { + DEBUG(dbgs() << "<< " << *MI); + } + } + + // Spill all physical registers holding virtual registers now. + DEBUG(dbgs() << "Spilling live registers at end of block.\n"); + spillAll(MBB->getFirstTerminator()); + + // Erase all the coalesced copies. We are delaying it until now because + // LiveVirtRegs might refer to the instrs. + for (unsigned i = 0, e = Coalesced.size(); i != e; ++i) + MBB->erase(Coalesced[i]); + NumCopies += Coalesced.size(); + + DEBUG(MBB->dump()); +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RAFast::runOnMachineFunction(MachineFunction &Fn) { + DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" + << "********** Function: " + << ((Value*)Fn.getFunction())->getName() << '\n'); + MF = &Fn; + MRI = &MF->getRegInfo(); + TM = &Fn.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + + UsedInInstr.resize(TRI->getNumRegs()); + Allocatable = TRI->getAllocatableSet(*MF); + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + unsigned LastVirtReg = MRI->getLastVirtReg(); + StackSlotForVirtReg.grow(LastVirtReg); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end(); + MBBi != MBBe; ++MBBi) { + MBB = &*MBBi; + AllocateBasicBlock(); + } + + // Make sure the set of used physregs is closed under subreg operations. + MRI->closePhysRegsUsed(*TRI); + + // Add the clobber lists for all the instructions we skipped earlier. + for (SmallPtrSet<const TargetInstrDesc*, 4>::const_iterator + I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) + if (const unsigned *Defs = (*I)->getImplicitDefs()) + while (*Defs) + MRI->setPhysRegUsed(*Defs++); + + SkippedInstrs.clear(); + StackSlotForVirtReg.clear(); + LiveDbgValueMap.clear(); + return true; +} + +FunctionPass *llvm::createFastRegisterAllocator() { + return new RAFast(); +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp new file mode 100644 index 0000000..5c62354 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp @@ -0,0 +1,1533 @@ +//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a linear scan register allocator. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "VirtRegMap.h" +#include "VirtRegRewriter.h" +#include "Spiller.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <set> +#include <queue> +#include <memory> +#include <cmath> + +using namespace llvm; + +STATISTIC(NumIters , "Number of iterations performed"); +STATISTIC(NumBacktracks, "Number of times we had to backtrack"); +STATISTIC(NumCoalesce, "Number of copies coalesced"); +STATISTIC(NumDowngrade, "Number of registers downgraded"); + +static cl::opt<bool> +NewHeuristic("new-spilling-heuristic", + cl::desc("Use new spilling heuristic"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +PreSplitIntervals("pre-alloc-split", + cl::desc("Pre-register allocation live interval splitting"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +TrivCoalesceEnds("trivial-coalesce-ends", + cl::desc("Attempt trivial coalescing of interval ends"), + cl::init(false), cl::Hidden); + +static RegisterRegAlloc +linearscanRegAlloc("linearscan", "linear scan register allocator", + createLinearScanRegisterAllocator); + +namespace { + // When we allocate a register, add it to a fixed-size queue of + // registers to skip in subsequent allocations. This trades a small + // amount of register pressure and increased spills for flexibility in + // the post-pass scheduler. + // + // Note that in a the number of registers used for reloading spills + // will be one greater than the value of this option. + // + // One big limitation of this is that it doesn't differentiate between + // different register classes. So on x86-64, if there is xmm register + // pressure, it can caused fewer GPRs to be held in the queue. + static cl::opt<unsigned> + NumRecentlyUsedRegs("linearscan-skip-count", + cl::desc("Number of registers for linearscan to remember" + "to skip."), + cl::init(0), + cl::Hidden); + + struct RALinScan : public MachineFunctionPass { + static char ID; + RALinScan() : MachineFunctionPass(ID) { + // Initialize the queue to record recently-used registers. + if (NumRecentlyUsedRegs > 0) + RecentRegs.resize(NumRecentlyUsedRegs, 0); + RecentNext = RecentRegs.begin(); + } + + typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr; + typedef SmallVector<IntervalPtr, 32> IntervalPtrs; + private: + /// RelatedRegClasses - This structure is built the first time a function is + /// compiled, and keeps track of which register classes have registers that + /// belong to multiple classes or have aliases that are in other classes. + EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses; + DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg; + + // NextReloadMap - For each register in the map, it maps to the another + // register which is defined by a reload from the same stack slot and + // both reloads are in the same basic block. + DenseMap<unsigned, unsigned> NextReloadMap; + + // DowngradedRegs - A set of registers which are being "downgraded", i.e. + // un-favored for allocation. + SmallSet<unsigned, 8> DowngradedRegs; + + // DowngradeMap - A map from virtual registers to physical registers being + // downgraded for the virtual registers. + DenseMap<unsigned, unsigned> DowngradeMap; + + MachineFunction* mf_; + MachineRegisterInfo* mri_; + const TargetMachine* tm_; + const TargetRegisterInfo* tri_; + const TargetInstrInfo* tii_; + BitVector allocatableRegs_; + BitVector reservedRegs_; + LiveIntervals* li_; + LiveStacks* ls_; + MachineLoopInfo *loopInfo; + + /// handled_ - Intervals are added to the handled_ set in the order of their + /// start value. This is uses for backtracking. + std::vector<LiveInterval*> handled_; + + /// fixed_ - Intervals that correspond to machine registers. + /// + IntervalPtrs fixed_; + + /// active_ - Intervals that are currently being processed, and which have a + /// live range active for the current point. + IntervalPtrs active_; + + /// inactive_ - Intervals that are currently being processed, but which have + /// a hold at the current point. + IntervalPtrs inactive_; + + typedef std::priority_queue<LiveInterval*, + SmallVector<LiveInterval*, 64>, + greater_ptr<LiveInterval> > IntervalHeap; + IntervalHeap unhandled_; + + /// regUse_ - Tracks register usage. + SmallVector<unsigned, 32> regUse_; + SmallVector<unsigned, 32> regUseBackUp_; + + /// vrm_ - Tracks register assignments. + VirtRegMap* vrm_; + + std::auto_ptr<VirtRegRewriter> rewriter_; + + std::auto_ptr<Spiller> spiller_; + + // The queue of recently-used registers. + SmallVector<unsigned, 4> RecentRegs; + SmallVector<unsigned, 4>::iterator RecentNext; + + // Record that we just picked this register. + void recordRecentlyUsed(unsigned reg) { + assert(reg != 0 && "Recently used register is NOREG!"); + if (!RecentRegs.empty()) { + *RecentNext++ = reg; + if (RecentNext == RecentRegs.end()) + RecentNext = RecentRegs.begin(); + } + } + + public: + virtual const char* getPassName() const { + return "Linear Scan Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + if (StrongPHIElim) + AU.addRequiredID(StrongPHIEliminationID); + // Make sure PassManager knows which analyses to make available + // to coalescing and which analyses coalescing invalidates. + AU.addRequiredTransitive<RegisterCoalescer>(); + AU.addRequired<CalculateSpillWeights>(); + if (PreSplitIntervals) + AU.addRequiredID(PreAllocSplittingID); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<VirtRegMap>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - register allocate the whole function + bool runOnMachineFunction(MachineFunction&); + + // Determine if we skip this register due to its being recently used. + bool isRecentlyUsed(unsigned reg) const { + return std::find(RecentRegs.begin(), RecentRegs.end(), reg) != + RecentRegs.end(); + } + + private: + /// linearScan - the linear scan algorithm + void linearScan(); + + /// initIntervalSets - initialize the interval sets. + /// + void initIntervalSets(); + + /// processActiveIntervals - expire old intervals and move non-overlapping + /// ones to the inactive list. + void processActiveIntervals(SlotIndex CurPoint); + + /// processInactiveIntervals - expire old intervals and move overlapping + /// ones to the active list. + void processInactiveIntervals(SlotIndex CurPoint); + + /// hasNextReloadInterval - Return the next liveinterval that's being + /// defined by a reload from the same SS as the specified one. + LiveInterval *hasNextReloadInterval(LiveInterval *cur); + + /// DowngradeRegister - Downgrade a register for allocation. + void DowngradeRegister(LiveInterval *li, unsigned Reg); + + /// UpgradeRegister - Upgrade a register for allocation. + void UpgradeRegister(unsigned Reg); + + /// assignRegOrStackSlotAtInterval - assign a register if one + /// is available, or spill. + void assignRegOrStackSlotAtInterval(LiveInterval* cur); + + void updateSpillWeights(std::vector<float> &Weights, + unsigned reg, float weight, + const TargetRegisterClass *RC); + + /// findIntervalsToSpill - Determine the intervals to spill for the + /// specified interval. It's passed the physical registers whose spill + /// weight is the lowest among all the registers whose live intervals + /// conflict with the interval. + void findIntervalsToSpill(LiveInterval *cur, + std::vector<std::pair<unsigned,float> > &Candidates, + unsigned NumCands, + SmallVector<LiveInterval*, 8> &SpillIntervals); + + /// attemptTrivialCoalescing - If a simple interval is defined by a copy, + /// try to allocate the definition to the same register as the source, + /// if the register is not defined during the life time of the interval. + /// This eliminates a copy, and is used to coalesce copies which were not + /// coalesced away before allocation either due to dest and src being in + /// different register classes or because the coalescer was overly + /// conservative. + unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg); + + /// + /// Register usage / availability tracking helpers. + /// + + void initRegUses() { + regUse_.resize(tri_->getNumRegs(), 0); + regUseBackUp_.resize(tri_->getNumRegs(), 0); + } + + void finalizeRegUses() { +#ifndef NDEBUG + // Verify all the registers are "freed". + bool Error = false; + for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { + if (regUse_[i] != 0) { + dbgs() << tri_->getName(i) << " is still in use!\n"; + Error = true; + } + } + if (Error) + llvm_unreachable(0); +#endif + regUse_.clear(); + regUseBackUp_.clear(); + } + + void addRegUse(unsigned physReg) { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + ++regUse_[physReg]; + for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) + ++regUse_[*as]; + } + + void delRegUse(unsigned physReg) { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + assert(regUse_[physReg] != 0); + --regUse_[physReg]; + for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) { + assert(regUse_[*as] != 0); + --regUse_[*as]; + } + } + + bool isRegAvail(unsigned physReg) const { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + return regUse_[physReg] == 0; + } + + void backUpRegUses() { + regUseBackUp_ = regUse_; + } + + void restoreRegUses() { + regUse_ = regUseBackUp_; + } + + /// + /// Register handling helpers. + /// + + /// getFreePhysReg - return a free physical register for this virtual + /// register interval if we have one, otherwise return 0. + unsigned getFreePhysReg(LiveInterval* cur); + unsigned getFreePhysReg(LiveInterval* cur, + const TargetRegisterClass *RC, + unsigned MaxInactiveCount, + SmallVector<unsigned, 256> &inactiveCounts, + bool SkipDGRegs); + + /// getFirstNonReservedPhysReg - return the first non-reserved physical + /// register in the register class. + unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) { + TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_); + TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_); + while (i != aoe && reservedRegs_.test(*i)) + ++i; + assert(i != aoe && "All registers reserved?!"); + return *i; + } + + void ComputeRelatedRegClasses(); + + template <typename ItTy> + void printIntervals(const char* const str, ItTy i, ItTy e) const { + DEBUG({ + if (str) + dbgs() << str << " intervals:\n"; + + for (; i != e; ++i) { + dbgs() << "\t" << *i->first << " -> "; + + unsigned reg = i->first->reg; + if (TargetRegisterInfo::isVirtualRegister(reg)) + reg = vrm_->getPhys(reg); + + dbgs() << tri_->getName(reg) << '\n'; + } + }); + } + }; + char RALinScan::ID = 0; +} + +INITIALIZE_PASS(RALinScan, "linearscan-regalloc", + "Linear Scan Register Allocator", false, false); + +void RALinScan::ComputeRelatedRegClasses() { + // First pass, add all reg classes to the union, and determine at least one + // reg class that each register is in. + bool HasAliases = false; + for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(), + E = tri_->regclass_end(); RCI != E; ++RCI) { + RelatedRegClasses.insert(*RCI); + for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); + I != E; ++I) { + HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0; + + const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; + if (PRC) { + // Already processed this register. Just make sure we know that + // multiple register classes share a register. + RelatedRegClasses.unionSets(PRC, *RCI); + } else { + PRC = *RCI; + } + } + } + + // Second pass, now that we know conservatively what register classes each reg + // belongs to, add info about aliases. We don't need to do this for targets + // without register aliases. + if (HasAliases) + for (DenseMap<unsigned, const TargetRegisterClass*>::iterator + I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end(); + I != E; ++I) + for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) + RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]); +} + +/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try +/// allocate the definition the same register as the source register if the +/// register is not defined during live time of the interval. If the interval is +/// killed by a copy, try to use the destination register. This eliminates a +/// copy. This is used to coalesce copies which were not coalesced away before +/// allocation either due to dest and src being in different register classes or +/// because the coalescer was overly conservative. +unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { + unsigned Preference = vrm_->getRegAllocPref(cur.reg); + if ((Preference && Preference == Reg) || !cur.containsOneValue()) + return Reg; + + // We cannot handle complicated live ranges. Simple linear stuff only. + if (cur.ranges.size() != 1) + return Reg; + + const LiveRange &range = cur.ranges.front(); + + VNInfo *vni = range.valno; + if (vni->isUnused()) + return Reg; + + unsigned CandReg; + { + MachineInstr *CopyMI; + if (vni->def != SlotIndex() && vni->isDefAccurate() && + (CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy()) + // Defined by a copy, try to extend SrcReg forward + CandReg = CopyMI->getOperand(1).getReg(); + else if (TrivCoalesceEnds && + (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) && + CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg()) + // Only used by a copy, try to extend DstReg backwards + CandReg = CopyMI->getOperand(0).getReg(); + else + return Reg; + } + + if (TargetRegisterInfo::isVirtualRegister(CandReg)) { + if (!vrm_->isAssignedReg(CandReg)) + return Reg; + CandReg = vrm_->getPhys(CandReg); + } + if (Reg == CandReg) + return Reg; + + const TargetRegisterClass *RC = mri_->getRegClass(cur.reg); + if (!RC->contains(CandReg)) + return Reg; + + if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg)) + return Reg; + + // Try to coalesce. + DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) + << '\n'); + vrm_->clearVirt(cur.reg); + vrm_->assignVirt2Phys(cur.reg, CandReg); + + ++NumCoalesce; + return CandReg; +} + +bool RALinScan::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &fn.getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + allocatableRegs_ = tri_->getAllocatableSet(fn); + reservedRegs_ = tri_->getReservedRegs(fn); + li_ = &getAnalysis<LiveIntervals>(); + ls_ = &getAnalysis<LiveStacks>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + // We don't run the coalescer here because we have no reason to + // interact with it. If the coalescer requires interaction, it + // won't do anything. If it doesn't require interaction, we assume + // it was run as a separate pass. + + // If this is the first function compiled, compute the related reg classes. + if (RelatedRegClasses.empty()) + ComputeRelatedRegClasses(); + + // Also resize register usage trackers. + initRegUses(); + + vrm_ = &getAnalysis<VirtRegMap>(); + if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); + + spiller_.reset(createSpiller(*this, *mf_, *vrm_)); + + initIntervalSets(); + + linearScan(); + + // Rewrite spill code and update the PhysRegsUsed set. + rewriter_->runOnMachineFunction(*mf_, *vrm_, li_); + + assert(unhandled_.empty() && "Unhandled live intervals remain!"); + + finalizeRegUses(); + + fixed_.clear(); + active_.clear(); + inactive_.clear(); + handled_.clear(); + NextReloadMap.clear(); + DowngradedRegs.clear(); + DowngradeMap.clear(); + spiller_.reset(0); + + return true; +} + +/// initIntervalSets - initialize the interval sets. +/// +void RALinScan::initIntervalSets() +{ + assert(unhandled_.empty() && fixed_.empty() && + active_.empty() && inactive_.empty() && + "interval sets should be empty on initialization"); + + handled_.reserve(li_->getNumIntervals()); + + for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { + if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { + if (!i->second->empty()) { + mri_->setPhysRegUsed(i->second->reg); + fixed_.push_back(std::make_pair(i->second, i->second->begin())); + } + } else { + if (i->second->empty()) { + assignRegOrStackSlotAtInterval(i->second); + } + else + unhandled_.push(i->second); + } + } +} + +void RALinScan::linearScan() { + // linear scan algorithm + DEBUG({ + dbgs() << "********** LINEAR SCAN **********\n" + << "********** Function: " + << mf_->getFunction()->getName() << '\n'; + printIntervals("fixed", fixed_.begin(), fixed_.end()); + }); + + while (!unhandled_.empty()) { + // pick the interval with the earliest start point + LiveInterval* cur = unhandled_.top(); + unhandled_.pop(); + ++NumIters; + DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n'); + + assert(!cur->empty() && "Empty interval in unhandled set."); + + processActiveIntervals(cur->beginIndex()); + processInactiveIntervals(cur->beginIndex()); + + assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && + "Can only allocate virtual registers!"); + + // Allocating a virtual register. try to find a free + // physical register or spill an interval (possibly this one) in order to + // assign it one. + assignRegOrStackSlotAtInterval(cur); + + DEBUG({ + printIntervals("active", active_.begin(), active_.end()); + printIntervals("inactive", inactive_.begin(), inactive_.end()); + }); + } + + // Expire any remaining active intervals + while (!active_.empty()) { + IntervalPtr &IP = active_.back(); + unsigned reg = IP.first->reg; + DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n"); + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + delRegUse(reg); + active_.pop_back(); + } + + // Expire any remaining inactive intervals + DEBUG({ + for (IntervalPtrs::reverse_iterator + i = inactive_.rbegin(); i != inactive_.rend(); ++i) + dbgs() << "\tinterval " << *i->first << " expired\n"; + }); + inactive_.clear(); + + // Add live-ins to every BB except for entry. Also perform trivial coalescing. + MachineFunction::iterator EntryMBB = mf_->begin(); + SmallVector<MachineBasicBlock*, 8> LiveInMBBs; + for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { + LiveInterval &cur = *i->second; + unsigned Reg = 0; + bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg); + if (isPhys) + Reg = cur.reg; + else if (vrm_->isAssignedReg(cur.reg)) + Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg)); + if (!Reg) + continue; + // Ignore splited live intervals. + if (!isPhys && vrm_->getPreSplitReg(cur.reg)) + continue; + + for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); + I != E; ++I) { + const LiveRange &LR = *I; + if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) { + for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i) + if (LiveInMBBs[i] != EntryMBB) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Adding a virtual register to livein set?"); + LiveInMBBs[i]->addLiveIn(Reg); + } + LiveInMBBs.clear(); + } + } + } + + DEBUG(dbgs() << *vrm_); + + // Look for physical registers that end up not being allocated even though + // register allocator had to spill other registers in its register class. + if (ls_->getNumIntervals() == 0) + return; + if (!vrm_->FindUnusedRegisters(li_)) + return; +} + +/// processActiveIntervals - expire old intervals and move non-overlapping ones +/// to the inactive list. +void RALinScan::processActiveIntervals(SlotIndex CurPoint) +{ + DEBUG(dbgs() << "\tprocessing active intervals:\n"); + + for (unsigned i = 0, e = active_.size(); i != e; ++i) { + LiveInterval *Interval = active_[i].first; + LiveInterval::iterator IntervalPos = active_[i].second; + unsigned reg = Interval->reg; + + IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); + + if (IntervalPos == Interval->end()) { // Remove expired intervals. + DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + delRegUse(reg); + + // Pop off the end of the list. + active_[i] = active_.back(); + active_.pop_back(); + --i; --e; + + } else if (IntervalPos->start > CurPoint) { + // Move inactive intervals to inactive list. + DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n"); + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + delRegUse(reg); + // add to inactive. + inactive_.push_back(std::make_pair(Interval, IntervalPos)); + + // Pop off the end of the list. + active_[i] = active_.back(); + active_.pop_back(); + --i; --e; + } else { + // Otherwise, just update the iterator position. + active_[i].second = IntervalPos; + } + } +} + +/// processInactiveIntervals - expire old intervals and move overlapping +/// ones to the active list. +void RALinScan::processInactiveIntervals(SlotIndex CurPoint) +{ + DEBUG(dbgs() << "\tprocessing inactive intervals:\n"); + + for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { + LiveInterval *Interval = inactive_[i].first; + LiveInterval::iterator IntervalPos = inactive_[i].second; + unsigned reg = Interval->reg; + + IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); + + if (IntervalPos == Interval->end()) { // remove expired intervals. + DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); + + // Pop off the end of the list. + inactive_[i] = inactive_.back(); + inactive_.pop_back(); + --i; --e; + } else if (IntervalPos->start <= CurPoint) { + // move re-activated intervals in active list + DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n"); + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + addRegUse(reg); + // add to active + active_.push_back(std::make_pair(Interval, IntervalPos)); + + // Pop off the end of the list. + inactive_[i] = inactive_.back(); + inactive_.pop_back(); + --i; --e; + } else { + // Otherwise, just update the iterator position. + inactive_[i].second = IntervalPos; + } + } +} + +/// updateSpillWeights - updates the spill weights of the specifed physical +/// register and its weight. +void RALinScan::updateSpillWeights(std::vector<float> &Weights, + unsigned reg, float weight, + const TargetRegisterClass *RC) { + SmallSet<unsigned, 4> Processed; + SmallSet<unsigned, 4> SuperAdded; + SmallVector<unsigned, 4> Supers; + Weights[reg] += weight; + Processed.insert(reg); + for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) { + Weights[*as] += weight; + Processed.insert(*as); + if (tri_->isSubRegister(*as, reg) && + SuperAdded.insert(*as) && + RC->contains(*as)) { + Supers.push_back(*as); + } + } + + // If the alias is a super-register, and the super-register is in the + // register class we are trying to allocate. Then add the weight to all + // sub-registers of the super-register even if they are not aliases. + // e.g. allocating for GR32, bh is not used, updating bl spill weight. + // bl should get the same spill weight otherwise it will be choosen + // as a spill candidate since spilling bh doesn't make ebx available. + for (unsigned i = 0, e = Supers.size(); i != e; ++i) { + for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) + if (!Processed.count(*sr)) + Weights[*sr] += weight; + } +} + +static +RALinScan::IntervalPtrs::iterator +FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { + for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end(); + I != E; ++I) + if (I->first == LI) return I; + return IP.end(); +} + +static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, + SlotIndex Point){ + for (unsigned i = 0, e = V.size(); i != e; ++i) { + RALinScan::IntervalPtr &IP = V[i]; + LiveInterval::iterator I = std::upper_bound(IP.first->begin(), + IP.second, Point); + if (I != IP.first->begin()) --I; + IP.second = I; + } +} + +/// addStackInterval - Create a LiveInterval for stack if the specified live +/// interval has been spilled. +static void addStackInterval(LiveInterval *cur, LiveStacks *ls_, + LiveIntervals *li_, + MachineRegisterInfo* mri_, VirtRegMap &vrm_) { + int SS = vrm_.getStackSlot(cur->reg); + if (SS == VirtRegMap::NO_STACK_SLOT) + return; + + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + LiveInterval &SI = ls_->getOrCreateInterval(SS, RC); + + VNInfo *VNI; + if (SI.hasAtLeastOneValue()) + VNI = SI.getValNumInfo(0); + else + VNI = SI.getNextValue(SlotIndex(), 0, false, + ls_->getVNInfoAllocator()); + + LiveInterval &RI = li_->getInterval(cur->reg); + // FIXME: This may be overly conservative. + SI.MergeRangesInAsValue(RI, VNI); +} + +/// getConflictWeight - Return the number of conflicts between cur +/// live interval and defs and uses of Reg weighted by loop depthes. +static +float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_, + MachineRegisterInfo *mri_, + MachineLoopInfo *loopInfo) { + float Conflicts = 0; + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), + E = mri_->reg_end(); I != E; ++I) { + MachineInstr *MI = &*I; + if (cur->liveAt(li_->getInstructionIndex(MI))) { + unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent()); + Conflicts += std::pow(10.0f, (float)loopDepth); + } + } + return Conflicts; +} + +/// findIntervalsToSpill - Determine the intervals to spill for the +/// specified interval. It's passed the physical registers whose spill +/// weight is the lowest among all the registers whose live intervals +/// conflict with the interval. +void RALinScan::findIntervalsToSpill(LiveInterval *cur, + std::vector<std::pair<unsigned,float> > &Candidates, + unsigned NumCands, + SmallVector<LiveInterval*, 8> &SpillIntervals) { + // We have figured out the *best* register to spill. But there are other + // registers that are pretty good as well (spill weight within 3%). Spill + // the one that has fewest defs and uses that conflict with cur. + float Conflicts[3] = { 0.0f, 0.0f, 0.0f }; + SmallVector<LiveInterval*, 8> SLIs[3]; + + DEBUG({ + dbgs() << "\tConsidering " << NumCands << " candidates: "; + for (unsigned i = 0; i != NumCands; ++i) + dbgs() << tri_->getName(Candidates[i].first) << " "; + dbgs() << "\n"; + }); + + // Calculate the number of conflicts of each candidate. + for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { + unsigned Reg = i->first->reg; + unsigned PhysReg = vrm_->getPhys(Reg); + if (!cur->overlapsFrom(*i->first, i->second)) + continue; + for (unsigned j = 0; j < NumCands; ++j) { + unsigned Candidate = Candidates[j].first; + if (tri_->regsOverlap(PhysReg, Candidate)) { + if (NumCands > 1) + Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); + SLIs[j].push_back(i->first); + } + } + } + + for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){ + unsigned Reg = i->first->reg; + unsigned PhysReg = vrm_->getPhys(Reg); + if (!cur->overlapsFrom(*i->first, i->second-1)) + continue; + for (unsigned j = 0; j < NumCands; ++j) { + unsigned Candidate = Candidates[j].first; + if (tri_->regsOverlap(PhysReg, Candidate)) { + if (NumCands > 1) + Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); + SLIs[j].push_back(i->first); + } + } + } + + // Which is the best candidate? + unsigned BestCandidate = 0; + float MinConflicts = Conflicts[0]; + for (unsigned i = 1; i != NumCands; ++i) { + if (Conflicts[i] < MinConflicts) { + BestCandidate = i; + MinConflicts = Conflicts[i]; + } + } + + std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(), + std::back_inserter(SpillIntervals)); +} + +namespace { + struct WeightCompare { + private: + const RALinScan &Allocator; + + public: + WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {} + + typedef std::pair<unsigned, float> RegWeightPair; + bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const { + return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first); + } + }; +} + +static bool weightsAreClose(float w1, float w2) { + if (!NewHeuristic) + return false; + + float diff = w1 - w2; + if (diff <= 0.02f) // Within 0.02f + return true; + return (diff / w2) <= 0.05f; // Within 5%. +} + +LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) { + DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg); + if (I == NextReloadMap.end()) + return 0; + return &li_->getInterval(I->second); +} + +void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) { + bool isNew = DowngradedRegs.insert(Reg); + isNew = isNew; // Silence compiler warning. + assert(isNew && "Multiple reloads holding the same register?"); + DowngradeMap.insert(std::make_pair(li->reg, Reg)); + for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) { + isNew = DowngradedRegs.insert(*AS); + isNew = isNew; // Silence compiler warning. + assert(isNew && "Multiple reloads holding the same register?"); + DowngradeMap.insert(std::make_pair(li->reg, *AS)); + } + ++NumDowngrade; +} + +void RALinScan::UpgradeRegister(unsigned Reg) { + if (Reg) { + DowngradedRegs.erase(Reg); + for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) + DowngradedRegs.erase(*AS); + } +} + +namespace { + struct LISorter { + bool operator()(LiveInterval* A, LiveInterval* B) { + return A->beginIndex() < B->beginIndex(); + } + }; +} + +/// assignRegOrStackSlotAtInterval - assign a register if one is available, or +/// spill. +void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { + DEBUG(dbgs() << "\tallocating current interval: "); + + // This is an implicitly defined live interval, just assign any register. + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + if (cur->empty()) { + unsigned physReg = vrm_->getRegAllocPref(cur->reg); + if (!physReg) + physReg = getFirstNonReservedPhysReg(RC); + DEBUG(dbgs() << tri_->getName(physReg) << '\n'); + // Note the register is not really in use. + vrm_->assignVirt2Phys(cur->reg, physReg); + return; + } + + backUpRegUses(); + + std::vector<std::pair<unsigned, float> > SpillWeightsToAdd; + SlotIndex StartPosition = cur->beginIndex(); + const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); + + // If start of this live interval is defined by a move instruction and its + // source is assigned a physical register that is compatible with the target + // register class, then we should try to assign it the same register. + // This can happen when the move is from a larger register class to a smaller + // one, e.g. X86::mov32to32_. These move instructions are not coalescable. + if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { + VNInfo *vni = cur->begin()->valno; + if ((vni->def != SlotIndex()) && !vni->isUnused() && + vni->isDefAccurate()) { + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + if (CopyMI && CopyMI->isCopy()) { + unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); + unsigned SrcReg = CopyMI->getOperand(1).getReg(); + unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg(); + unsigned Reg = 0; + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + Reg = SrcReg; + else if (vrm_->isAssignedReg(SrcReg)) + Reg = vrm_->getPhys(SrcReg); + if (Reg) { + if (SrcSubReg) + Reg = tri_->getSubReg(Reg, SrcSubReg); + if (DstSubReg) + Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); + if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) + mri_->setRegAllocationHint(cur->reg, 0, Reg); + } + } + } + } + + // For every interval in inactive we overlap with, mark the + // register as not free and update spill weights. + for (IntervalPtrs::const_iterator i = inactive_.begin(), + e = inactive_.end(); i != e; ++i) { + unsigned Reg = i->first->reg; + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Can only allocate virtual registers!"); + const TargetRegisterClass *RegRC = mri_->getRegClass(Reg); + // If this is not in a related reg class to the register we're allocating, + // don't check it. + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && + cur->overlapsFrom(*i->first, i->second-1)) { + Reg = vrm_->getPhys(Reg); + addRegUse(Reg); + SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight)); + } + } + + // Speculatively check to see if we can get a register right now. If not, + // we know we won't be able to by adding more constraints. If so, we can + // check to see if it is valid. Doing an exhaustive search of the fixed_ list + // is very bad (it contains all callee clobbered registers for any functions + // with a call), so we want to avoid doing that if possible. + unsigned physReg = getFreePhysReg(cur); + unsigned BestPhysReg = physReg; + if (physReg) { + // We got a register. However, if it's in the fixed_ list, we might + // conflict with it. Check to see if we conflict with it or any of its + // aliases. + SmallSet<unsigned, 8> RegAliases; + for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS) + RegAliases.insert(*AS); + + bool ConflictsWithFixed = false; + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) { + // Okay, this reg is on the fixed list. Check to see if we actually + // conflict. + LiveInterval *I = IP.first; + if (I->endIndex() > StartPosition) { + LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); + IP.second = II; + if (II != I->begin() && II->start > StartPosition) + --II; + if (cur->overlapsFrom(*I, II)) { + ConflictsWithFixed = true; + break; + } + } + } + } + + // Okay, the register picked by our speculative getFreePhysReg call turned + // out to be in use. Actually add all of the conflicting fixed registers to + // regUse_ so we can do an accurate query. + if (ConflictsWithFixed) { + // For every interval in fixed we overlap with, mark the register as not + // free and update spill weights. + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + LiveInterval *I = IP.first; + + const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && + I->endIndex() > StartPosition) { + LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); + IP.second = II; + if (II != I->begin() && II->start > StartPosition) + --II; + if (cur->overlapsFrom(*I, II)) { + unsigned reg = I->reg; + addRegUse(reg); + SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight)); + } + } + } + + // Using the newly updated regUse_ object, which includes conflicts in the + // future, see if there are any registers available. + physReg = getFreePhysReg(cur); + } + } + + // Restore the physical register tracker, removing information about the + // future. + restoreRegUses(); + + // If we find a free register, we are done: assign this virtual to + // the free physical register and add this interval to the active + // list. + if (physReg) { + DEBUG(dbgs() << tri_->getName(physReg) << '\n'); + vrm_->assignVirt2Phys(cur->reg, physReg); + addRegUse(physReg); + active_.push_back(std::make_pair(cur, cur->begin())); + handled_.push_back(cur); + + // "Upgrade" the physical register since it has been allocated. + UpgradeRegister(physReg); + if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { + // "Downgrade" physReg to try to keep physReg from being allocated until + // the next reload from the same SS is allocated. + mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg); + DowngradeRegister(cur, physReg); + } + return; + } + DEBUG(dbgs() << "no free registers\n"); + + // Compile the spill weights into an array that is better for scanning. + std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f); + for (std::vector<std::pair<unsigned, float> >::iterator + I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) + updateSpillWeights(SpillWeights, I->first, I->second, RC); + + // for each interval in active, update spill weights. + for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); + i != e; ++i) { + unsigned reg = i->first->reg; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + updateSpillWeights(SpillWeights, reg, i->first->weight, RC); + } + + DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n"); + + // Find a register to spill. + float minWeight = HUGE_VALF; + unsigned minReg = 0; + + bool Found = false; + std::vector<std::pair<unsigned,float> > RegsWeights; + if (!minReg || SpillWeights[minReg] == HUGE_VALF) + for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), + e = RC->allocation_order_end(*mf_); i != e; ++i) { + unsigned reg = *i; + float regWeight = SpillWeights[reg]; + // Don't even consider reserved regs. + if (reservedRegs_.test(reg)) + continue; + // Skip recently allocated registers and reserved registers. + if (minWeight > regWeight && !isRecentlyUsed(reg)) + Found = true; + RegsWeights.push_back(std::make_pair(reg, regWeight)); + } + + // If we didn't find a register that is spillable, try aliases? + if (!Found) { + for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), + e = RC->allocation_order_end(*mf_); i != e; ++i) { + unsigned reg = *i; + if (reservedRegs_.test(reg)) + continue; + // No need to worry about if the alias register size < regsize of RC. + // We are going to spill all registers that alias it anyway. + for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) + RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as])); + } + } + + // Sort all potential spill candidates by weight. + std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this)); + minReg = RegsWeights[0].first; + minWeight = RegsWeights[0].second; + if (minWeight == HUGE_VALF) { + // All registers must have inf weight. Just grab one! + minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC); + if (cur->weight == HUGE_VALF || + li_->getApproximateInstructionCount(*cur) == 0) { + // Spill a physical register around defs and uses. + if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) { + // spillPhysRegAroundRegDefsUses may have invalidated iterator stored + // in fixed_. Reset them. + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + LiveInterval *I = IP.first; + if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg)) + IP.second = I->advanceTo(I->begin(), StartPosition); + } + + DowngradedRegs.clear(); + assignRegOrStackSlotAtInterval(cur); + } else { + assert(false && "Ran out of registers during register allocation!"); + report_fatal_error("Ran out of registers during register allocation!"); + } + return; + } + } + + // Find up to 3 registers to consider as spill candidates. + unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1; + while (LastCandidate > 1) { + if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight)) + break; + --LastCandidate; + } + + DEBUG({ + dbgs() << "\t\tregister(s) with min weight(s): "; + + for (unsigned i = 0; i != LastCandidate; ++i) + dbgs() << tri_->getName(RegsWeights[i].first) + << " (" << RegsWeights[i].second << ")\n"; + }); + + // If the current has the minimum weight, we need to spill it and + // add any added intervals back to unhandled, and restart + // linearscan. + if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { + DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); + SmallVector<LiveInterval*, 8> spillIs, added; + spiller_->spill(cur, added, spillIs); + + std::sort(added.begin(), added.end(), LISorter()); + addStackInterval(cur, ls_, li_, mri_, *vrm_); + if (added.empty()) + return; // Early exit if all spills were folded. + + // Merge added with unhandled. Note that we have already sorted + // intervals returned by addIntervalsForSpills by their starting + // point. + // This also update the NextReloadMap. That is, it adds mapping from a + // register defined by a reload from SS to the next reload from SS in the + // same basic block. + MachineBasicBlock *LastReloadMBB = 0; + LiveInterval *LastReload = 0; + int LastReloadSS = VirtRegMap::NO_STACK_SLOT; + for (unsigned i = 0, e = added.size(); i != e; ++i) { + LiveInterval *ReloadLi = added[i]; + if (ReloadLi->weight == HUGE_VALF && + li_->getApproximateInstructionCount(*ReloadLi) == 0) { + SlotIndex ReloadIdx = ReloadLi->beginIndex(); + MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); + int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); + if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { + // Last reload of same SS is in the same MBB. We want to try to + // allocate both reloads the same register and make sure the reg + // isn't clobbered in between if at all possible. + assert(LastReload->beginIndex() < ReloadIdx); + NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); + } + LastReloadMBB = ReloadMBB; + LastReload = ReloadLi; + LastReloadSS = ReloadSS; + } + unhandled_.push(ReloadLi); + } + return; + } + + ++NumBacktracks; + + // Push the current interval back to unhandled since we are going + // to re-run at least this iteration. Since we didn't modify it it + // should go back right in the front of the list + unhandled_.push(cur); + + assert(TargetRegisterInfo::isPhysicalRegister(minReg) && + "did not choose a register to spill?"); + + // We spill all intervals aliasing the register with + // minimum weight, rollback to the interval with the earliest + // start point and let the linear scan algorithm run again + SmallVector<LiveInterval*, 8> spillIs; + + // Determine which intervals have to be spilled. + findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs); + + // Set of spilled vregs (used later to rollback properly) + SmallSet<unsigned, 8> spilled; + + // The earliest start of a Spilled interval indicates up to where + // in handled we need to roll back + assert(!spillIs.empty() && "No spill intervals?"); + SlotIndex earliestStart = spillIs[0]->beginIndex(); + + // Spill live intervals of virtual regs mapped to the physical register we + // want to clear (and its aliases). We only spill those that overlap with the + // current interval as the rest do not affect its allocation. we also keep + // track of the earliest start of all spilled live intervals since this will + // mark our rollback point. + SmallVector<LiveInterval*, 8> added; + while (!spillIs.empty()) { + LiveInterval *sli = spillIs.back(); + spillIs.pop_back(); + DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); + if (sli->beginIndex() < earliestStart) + earliestStart = sli->beginIndex(); + spiller_->spill(sli, added, spillIs); + addStackInterval(sli, ls_, li_, mri_, *vrm_); + spilled.insert(sli->reg); + } + + // Include any added intervals in earliestStart. + for (unsigned i = 0, e = added.size(); i != e; ++i) { + SlotIndex SI = added[i]->beginIndex(); + if (SI < earliestStart) + earliestStart = SI; + } + + DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n'); + + // Scan handled in reverse order up to the earliest start of a + // spilled live interval and undo each one, restoring the state of + // unhandled. + while (!handled_.empty()) { + LiveInterval* i = handled_.back(); + // If this interval starts before t we are done. + if (!i->empty() && i->beginIndex() < earliestStart) + break; + DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n'); + handled_.pop_back(); + + // When undoing a live interval allocation we must know if it is active or + // inactive to properly update regUse_ and the VirtRegMap. + IntervalPtrs::iterator it; + if ((it = FindIntervalInVector(active_, i)) != active_.end()) { + active_.erase(it); + assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); + if (!spilled.count(i->reg)) + unhandled_.push(i); + delRegUse(vrm_->getPhys(i->reg)); + vrm_->clearVirt(i->reg); + } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) { + inactive_.erase(it); + assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); + if (!spilled.count(i->reg)) + unhandled_.push(i); + vrm_->clearVirt(i->reg); + } else { + assert(TargetRegisterInfo::isVirtualRegister(i->reg) && + "Can only allocate virtual registers!"); + vrm_->clearVirt(i->reg); + unhandled_.push(i); + } + + DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg); + if (ii == DowngradeMap.end()) + // It interval has a preference, it must be defined by a copy. Clear the + // preference now since the source interval allocation may have been + // undone as well. + mri_->setRegAllocationHint(i->reg, 0, 0); + else { + UpgradeRegister(ii->second); + } + } + + // Rewind the iterators in the active, inactive, and fixed lists back to the + // point we reverted to. + RevertVectorIteratorsTo(active_, earliestStart); + RevertVectorIteratorsTo(inactive_, earliestStart); + RevertVectorIteratorsTo(fixed_, earliestStart); + + // Scan the rest and undo each interval that expired after t and + // insert it in active (the next iteration of the algorithm will + // put it in inactive if required) + for (unsigned i = 0, e = handled_.size(); i != e; ++i) { + LiveInterval *HI = handled_[i]; + if (!HI->expiredAt(earliestStart) && + HI->expiredAt(cur->beginIndex())) { + DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n'); + active_.push_back(std::make_pair(HI, HI->begin())); + assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); + addRegUse(vrm_->getPhys(HI->reg)); + } + } + + // Merge added with unhandled. + // This also update the NextReloadMap. That is, it adds mapping from a + // register defined by a reload from SS to the next reload from SS in the + // same basic block. + MachineBasicBlock *LastReloadMBB = 0; + LiveInterval *LastReload = 0; + int LastReloadSS = VirtRegMap::NO_STACK_SLOT; + std::sort(added.begin(), added.end(), LISorter()); + for (unsigned i = 0, e = added.size(); i != e; ++i) { + LiveInterval *ReloadLi = added[i]; + if (ReloadLi->weight == HUGE_VALF && + li_->getApproximateInstructionCount(*ReloadLi) == 0) { + SlotIndex ReloadIdx = ReloadLi->beginIndex(); + MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); + int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); + if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { + // Last reload of same SS is in the same MBB. We want to try to + // allocate both reloads the same register and make sure the reg + // isn't clobbered in between if at all possible. + assert(LastReload->beginIndex() < ReloadIdx); + NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); + } + LastReloadMBB = ReloadMBB; + LastReload = ReloadLi; + LastReloadSS = ReloadSS; + } + unhandled_.push(ReloadLi); + } +} + +unsigned RALinScan::getFreePhysReg(LiveInterval* cur, + const TargetRegisterClass *RC, + unsigned MaxInactiveCount, + SmallVector<unsigned, 256> &inactiveCounts, + bool SkipDGRegs) { + unsigned FreeReg = 0; + unsigned FreeRegInactiveCount = 0; + + std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg); + // Resolve second part of the hint (if possible) given the current allocation. + unsigned physReg = Hint.second; + if (physReg && + TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) + physReg = vrm_->getPhys(physReg); + + TargetRegisterClass::iterator I, E; + tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_); + assert(I != E && "No allocatable register in this register class!"); + + // Scan for the first available register. + for (; I != E; ++I) { + unsigned Reg = *I; + // Ignore "downgraded" registers. + if (SkipDGRegs && DowngradedRegs.count(Reg)) + continue; + // Skip reserved registers. + if (reservedRegs_.test(Reg)) + continue; + // Skip recently allocated registers. + if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) { + FreeReg = Reg; + if (FreeReg < inactiveCounts.size()) + FreeRegInactiveCount = inactiveCounts[FreeReg]; + else + FreeRegInactiveCount = 0; + break; + } + } + + // If there are no free regs, or if this reg has the max inactive count, + // return this register. + if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) { + // Remember what register we picked so we can skip it next time. + if (FreeReg != 0) recordRecentlyUsed(FreeReg); + return FreeReg; + } + + // Continue scanning the registers, looking for the one with the highest + // inactive count. Alkis found that this reduced register pressure very + // slightly on X86 (in rev 1.94 of this file), though this should probably be + // reevaluated now. + for (; I != E; ++I) { + unsigned Reg = *I; + // Ignore "downgraded" registers. + if (SkipDGRegs && DowngradedRegs.count(Reg)) + continue; + // Skip reserved registers. + if (reservedRegs_.test(Reg)) + continue; + if (isRegAvail(Reg) && Reg < inactiveCounts.size() && + FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) { + FreeReg = Reg; + FreeRegInactiveCount = inactiveCounts[Reg]; + if (FreeRegInactiveCount == MaxInactiveCount) + break; // We found the one with the max inactive count. + } + } + + // Remember what register we picked so we can skip it next time. + recordRecentlyUsed(FreeReg); + + return FreeReg; +} + +/// getFreePhysReg - return a free physical register for this virtual register +/// interval if we have one, otherwise return 0. +unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { + SmallVector<unsigned, 256> inactiveCounts; + unsigned MaxInactiveCount = 0; + + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); + + for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); + i != e; ++i) { + unsigned reg = i->first->reg; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + + // If this is not in a related reg class to the register we're allocating, + // don't check it. + const TargetRegisterClass *RegRC = mri_->getRegClass(reg); + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { + reg = vrm_->getPhys(reg); + if (inactiveCounts.size() <= reg) + inactiveCounts.resize(reg+1); + ++inactiveCounts[reg]; + MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]); + } + } + + // If copy coalescer has assigned a "preferred" register, check if it's + // available first. + unsigned Preference = vrm_->getRegAllocPref(cur->reg); + if (Preference) { + DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") "); + if (isRegAvail(Preference) && + RC->contains(Preference)) + return Preference; + } + + if (!DowngradedRegs.empty()) { + unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, + true); + if (FreeReg) + return FreeReg; + } + return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); +} + +FunctionPass* llvm::createLinearScanRegisterAllocator() { + return new RALinScan(); +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp new file mode 100644 index 0000000..61f337b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -0,0 +1,942 @@ +//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based +// register allocator for LLVM. This allocator works by constructing a PBQP +// problem representing the register allocation problem under consideration, +// solving this using a PBQP solver, and mapping the solution back to a +// register assignment. If any variables are selected for spilling then spill +// code is inserted and the process repeated. +// +// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned +// for register allocation. For more information on PBQP for register +// allocation, see the following papers: +// +// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with +// PBQP. In Proceedings of the 7th Joint Modular Languages Conference +// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361. +// +// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular +// architectures. In Proceedings of the Joint Conference on Languages, +// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York, +// NY, USA, 139-148. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" + +#include "PBQP/HeuristicSolver.h" +#include "PBQP/Graph.h" +#include "PBQP/Heuristics/Briggs.h" +#include "RenderMachineFunction.h" +#include "Splitter.h" +#include "VirtRegMap.h" +#include "VirtRegRewriter.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <limits> +#include <map> +#include <memory> +#include <set> +#include <vector> + +using namespace llvm; + +static RegisterRegAlloc +registerPBQPRepAlloc("pbqp", "PBQP register allocator", + llvm::createPBQPRegisterAllocator); + +static cl::opt<bool> +pbqpCoalescing("pbqp-coalescing", + cl::desc("Attempt coalescing during PBQP register allocation."), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +pbqpPreSplitting("pbqp-pre-splitting", + cl::desc("Pre-splite before PBQP register allocation."), + cl::init(false), cl::Hidden); + +namespace { + + /// + /// PBQP based allocators solve the register allocation problem by mapping + /// register allocation problems to Partitioned Boolean Quadratic + /// Programming problems. + class PBQPRegAlloc : public MachineFunctionPass { + public: + + static char ID; + + /// Construct a PBQP register allocator. + PBQPRegAlloc() : MachineFunctionPass(ID) {} + + /// Return the pass name. + virtual const char* getPassName() const { + return "PBQP Register Allocator"; + } + + /// PBQP analysis usage. + virtual void getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<SlotIndexes>(); + au.addPreserved<SlotIndexes>(); + au.addRequired<LiveIntervals>(); + //au.addRequiredID(SplitCriticalEdgesID); + au.addRequired<RegisterCoalescer>(); + au.addRequired<CalculateSpillWeights>(); + au.addRequired<LiveStacks>(); + au.addPreserved<LiveStacks>(); + au.addRequired<MachineLoopInfo>(); + au.addPreserved<MachineLoopInfo>(); + if (pbqpPreSplitting) + au.addRequired<LoopSplitter>(); + au.addRequired<VirtRegMap>(); + au.addRequired<RenderMachineFunction>(); + MachineFunctionPass::getAnalysisUsage(au); + } + + /// Perform register allocation + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + + class LIOrdering { + public: + bool operator()(const LiveInterval *li1, const LiveInterval *li2) const { + return li1->reg < li2->reg; + } + }; + + typedef std::map<const LiveInterval*, unsigned, LIOrdering> LI2NodeMap; + typedef std::vector<const LiveInterval*> Node2LIMap; + typedef std::vector<unsigned> AllowedSet; + typedef std::vector<AllowedSet> AllowedSetMap; + typedef std::set<unsigned> RegSet; + typedef std::pair<unsigned, unsigned> RegPair; + typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; + + typedef std::set<LiveInterval*, LIOrdering> LiveIntervalSet; + + typedef std::vector<PBQP::Graph::NodeItr> NodeVector; + + MachineFunction *mf; + const TargetMachine *tm; + const TargetRegisterInfo *tri; + const TargetInstrInfo *tii; + const MachineLoopInfo *loopInfo; + MachineRegisterInfo *mri; + RenderMachineFunction *rmf; + + LiveIntervals *lis; + LiveStacks *lss; + VirtRegMap *vrm; + + LI2NodeMap li2Node; + Node2LIMap node2LI; + AllowedSetMap allowedSets; + LiveIntervalSet vregIntervalsToAlloc, + emptyVRegIntervals; + NodeVector problemNodes; + + + /// Builds a PBQP cost vector. + template <typename RegContainer> + PBQP::Vector buildCostVector(unsigned vReg, + const RegContainer &allowed, + const CoalesceMap &cealesces, + PBQP::PBQPNum spillCost) const; + + /// \brief Builds a PBQP interference matrix. + /// + /// @return Either a pointer to a non-zero PBQP matrix representing the + /// allocation option costs, or a null pointer for a zero matrix. + /// + /// Expects allowed sets for two interfering LiveIntervals. These allowed + /// sets should contain only allocable registers from the LiveInterval's + /// register class, with any interfering pre-colored registers removed. + template <typename RegContainer> + PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1, + const RegContainer &allowed2) const; + + /// + /// Expects allowed sets for two potentially coalescable LiveIntervals, + /// and an estimated benefit due to coalescing. The allowed sets should + /// contain only allocable registers from the LiveInterval's register + /// classes, with any interfering pre-colored registers removed. + template <typename RegContainer> + PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1, + const RegContainer &allowed2, + PBQP::PBQPNum cBenefit) const; + + /// \brief Finds coalescing opportunities and returns them as a map. + /// + /// Any entries in the map are guaranteed coalescable, even if their + /// corresponding live intervals overlap. + CoalesceMap findCoalesces(); + + /// \brief Finds the initial set of vreg intervals to allocate. + void findVRegIntervalsToAlloc(); + + /// \brief Constructs a PBQP problem representation of the register + /// allocation problem for this function. + /// + /// @return a PBQP solver object for the register allocation problem. + PBQP::Graph constructPBQPProblem(); + + /// \brief Adds a stack interval if the given live interval has been + /// spilled. Used to support stack slot coloring. + void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri); + + /// \brief Given a solved PBQP problem maps this solution back to a register + /// assignment. + bool mapPBQPToRegAlloc(const PBQP::Solution &solution); + + /// \brief Postprocessing before final spilling. Sets basic block "live in" + /// variables. + void finalizeAlloc() const; + + }; + + char PBQPRegAlloc::ID = 0; +} + + +template <typename RegContainer> +PBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg, + const RegContainer &allowed, + const CoalesceMap &coalesces, + PBQP::PBQPNum spillCost) const { + + typedef typename RegContainer::const_iterator AllowedItr; + + // Allocate vector. Additional element (0th) used for spill option + PBQP::Vector v(allowed.size() + 1, 0); + + v[0] = spillCost; + + // Iterate over the allowed registers inserting coalesce benefits if there + // are any. + unsigned ai = 0; + for (AllowedItr itr = allowed.begin(), end = allowed.end(); + itr != end; ++itr, ++ai) { + + unsigned pReg = *itr; + + CoalesceMap::const_iterator cmItr = + coalesces.find(RegPair(vReg, pReg)); + + // No coalesce - on to the next preg. + if (cmItr == coalesces.end()) + continue; + + // We have a coalesce - insert the benefit. + v[ai + 1] = -cmItr->second; + } + + return v; +} + +template <typename RegContainer> +PBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix( + const RegContainer &allowed1, const RegContainer &allowed2) const { + + typedef typename RegContainer::const_iterator RegContainerIterator; + + // Construct a PBQP matrix representing the cost of allocation options. The + // rows and columns correspond to the allocation options for the two live + // intervals. Elements will be infinite where corresponding registers alias, + // since we cannot allocate aliasing registers to interfering live intervals. + // All other elements (non-aliasing combinations) will have zero cost. Note + // that the spill option (element 0,0) has zero cost, since we can allocate + // both intervals to memory safely (the cost for each individual allocation + // to memory is accounted for by the cost vectors for each live interval). + PBQP::Matrix *m = + new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0); + + // Assume this is a zero matrix until proven otherwise. Zero matrices occur + // between interfering live ranges with non-overlapping register sets (e.g. + // non-overlapping reg classes, or disjoint sets of allowed regs within the + // same class). The term "overlapping" is used advisedly: sets which do not + // intersect, but contain registers which alias, will have non-zero matrices. + // We optimize zero matrices away to improve solver speed. + bool isZeroMatrix = true; + + + // Row index. Starts at 1, since the 0th row is for the spill option, which + // is always zero. + unsigned ri = 1; + + // Iterate over allowed sets, insert infinities where required. + for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end(); + a1Itr != a1End; ++a1Itr) { + + // Column index, starts at 1 as for row index. + unsigned ci = 1; + unsigned reg1 = *a1Itr; + + for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end(); + a2Itr != a2End; ++a2Itr) { + + unsigned reg2 = *a2Itr; + + // If the row/column regs are identical or alias insert an infinity. + if (tri->regsOverlap(reg1, reg2)) { + (*m)[ri][ci] = std::numeric_limits<PBQP::PBQPNum>::infinity(); + isZeroMatrix = false; + } + + ++ci; + } + + ++ri; + } + + // If this turns out to be a zero matrix... + if (isZeroMatrix) { + // free it and return null. + delete m; + return 0; + } + + // ...otherwise return the cost matrix. + return m; +} + +template <typename RegContainer> +PBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix( + const RegContainer &allowed1, const RegContainer &allowed2, + PBQP::PBQPNum cBenefit) const { + + typedef typename RegContainer::const_iterator RegContainerIterator; + + // Construct a PBQP Matrix representing the benefits of coalescing. As with + // interference matrices the rows and columns represent allowed registers + // for the LiveIntervals which are (potentially) to be coalesced. The amount + // -cBenefit will be placed in any element representing the same register + // for both intervals. + PBQP::Matrix *m = + new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0); + + // Reset costs to zero. + m->reset(0); + + // Assume the matrix is zero till proven otherwise. Zero matrices will be + // optimized away as in the interference case. + bool isZeroMatrix = true; + + // Row index. Starts at 1, since the 0th row is for the spill option, which + // is always zero. + unsigned ri = 1; + + // Iterate over the allowed sets, insert coalescing benefits where + // appropriate. + for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end(); + a1Itr != a1End; ++a1Itr) { + + // Column index, starts at 1 as for row index. + unsigned ci = 1; + unsigned reg1 = *a1Itr; + + for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end(); + a2Itr != a2End; ++a2Itr) { + + // If the row and column represent the same register insert a beneficial + // cost to preference this allocation - it would allow us to eliminate a + // move instruction. + if (reg1 == *a2Itr) { + (*m)[ri][ci] = -cBenefit; + isZeroMatrix = false; + } + + ++ci; + } + + ++ri; + } + + // If this turns out to be a zero matrix... + if (isZeroMatrix) { + // ...free it and return null. + delete m; + return 0; + } + + return m; +} + +PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { + + typedef MachineFunction::const_iterator MFIterator; + typedef MachineBasicBlock::const_iterator MBBIterator; + typedef LiveInterval::const_vni_iterator VNIIterator; + + CoalesceMap coalescesFound; + + // To find coalesces we need to iterate over the function looking for + // copy instructions. + for (MFIterator bbItr = mf->begin(), bbEnd = mf->end(); + bbItr != bbEnd; ++bbItr) { + + const MachineBasicBlock *mbb = &*bbItr; + + for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end(); + iItr != iEnd; ++iItr) { + + const MachineInstr *instr = &*iItr; + + // If this isn't a copy then continue to the next instruction. + if (!instr->isCopy()) + continue; + + unsigned srcReg = instr->getOperand(1).getReg(); + unsigned dstReg = instr->getOperand(0).getReg(); + + // If the registers are already the same our job is nice and easy. + if (dstReg == srcReg) + continue; + + bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg), + dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg); + + // If both registers are physical then we can't coalesce. + if (srcRegIsPhysical && dstRegIsPhysical) + continue; + + // If it's a copy that includes two virtual register but the source and + // destination classes differ then we can't coalesce. + if (!srcRegIsPhysical && !dstRegIsPhysical && + mri->getRegClass(srcReg) != mri->getRegClass(dstReg)) + continue; + + // If one is physical and one is virtual, check that the physical is + // allocatable in the class of the virtual. + if (srcRegIsPhysical && !dstRegIsPhysical) { + const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg); + if (std::find(dstRegClass->allocation_order_begin(*mf), + dstRegClass->allocation_order_end(*mf), srcReg) == + dstRegClass->allocation_order_end(*mf)) + continue; + } + if (!srcRegIsPhysical && dstRegIsPhysical) { + const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg); + if (std::find(srcRegClass->allocation_order_begin(*mf), + srcRegClass->allocation_order_end(*mf), dstReg) == + srcRegClass->allocation_order_end(*mf)) + continue; + } + + // If we've made it here we have a copy with compatible register classes. + // We can probably coalesce, but we need to consider overlap. + const LiveInterval *srcLI = &lis->getInterval(srcReg), + *dstLI = &lis->getInterval(dstReg); + + if (srcLI->overlaps(*dstLI)) { + // Even in the case of an overlap we might still be able to coalesce, + // but we need to make sure that no definition of either range occurs + // while the other range is live. + + // Otherwise start by assuming we're ok. + bool badDef = false; + + // Test all defs of the source range. + for (VNIIterator + vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end(); + vniItr != vniEnd; ++vniItr) { + + // If we find a poorly defined def we err on the side of caution. + if (!(*vniItr)->def.isValid()) { + badDef = true; + break; + } + + // If we find a def that kills the coalescing opportunity then + // record it and break from the loop. + if (dstLI->liveAt((*vniItr)->def)) { + badDef = true; + break; + } + } + + // If we have a bad def give up, continue to the next instruction. + if (badDef) + continue; + + // Otherwise test definitions of the destination range. + for (VNIIterator + vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end(); + vniItr != vniEnd; ++vniItr) { + + // We want to make sure we skip the copy instruction itself. + if ((*vniItr)->getCopy() == instr) + continue; + + if (!(*vniItr)->def.isValid()) { + badDef = true; + break; + } + + if (srcLI->liveAt((*vniItr)->def)) { + badDef = true; + break; + } + } + + // As before a bad def we give up and continue to the next instr. + if (badDef) + continue; + } + + // If we make it to here then either the ranges didn't overlap, or they + // did, but none of their definitions would prevent us from coalescing. + // We're good to go with the coalesce. + + float cBenefit = std::pow(10.0f, (float)loopInfo->getLoopDepth(mbb)) / 5.0; + + coalescesFound[RegPair(srcReg, dstReg)] = cBenefit; + coalescesFound[RegPair(dstReg, srcReg)] = cBenefit; + } + + } + + return coalescesFound; +} + +void PBQPRegAlloc::findVRegIntervalsToAlloc() { + + // Iterate over all live ranges. + for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); + itr != end; ++itr) { + + // Ignore physical ones. + if (TargetRegisterInfo::isPhysicalRegister(itr->first)) + continue; + + LiveInterval *li = itr->second; + + // If this live interval is non-empty we will use pbqp to allocate it. + // Empty intervals we allocate in a simple post-processing stage in + // finalizeAlloc. + if (!li->empty()) { + vregIntervalsToAlloc.insert(li); + } + else { + emptyVRegIntervals.insert(li); + } + } +} + +PBQP::Graph PBQPRegAlloc::constructPBQPProblem() { + + typedef std::vector<const LiveInterval*> LIVector; + typedef std::vector<unsigned> RegVector; + + // This will store the physical intervals for easy reference. + LIVector physIntervals; + + // Start by clearing the old node <-> live interval mappings & allowed sets + li2Node.clear(); + node2LI.clear(); + allowedSets.clear(); + + // Populate physIntervals, update preg use: + for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); + itr != end; ++itr) { + + if (TargetRegisterInfo::isPhysicalRegister(itr->first)) { + physIntervals.push_back(itr->second); + mri->setPhysRegUsed(itr->second->reg); + } + } + + // Iterate over vreg intervals, construct live interval <-> node number + // mappings. + for (LiveIntervalSet::const_iterator + itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end(); + itr != end; ++itr) { + const LiveInterval *li = *itr; + + li2Node[li] = node2LI.size(); + node2LI.push_back(li); + } + + // Get the set of potential coalesces. + CoalesceMap coalesces; + + if (pbqpCoalescing) { + coalesces = findCoalesces(); + } + + // Construct a PBQP solver for this problem + PBQP::Graph problem; + problemNodes.resize(vregIntervalsToAlloc.size()); + + // Resize allowedSets container appropriately. + allowedSets.resize(vregIntervalsToAlloc.size()); + + BitVector ReservedRegs = tri->getReservedRegs(*mf); + + // Iterate over virtual register intervals to compute allowed sets... + for (unsigned node = 0; node < node2LI.size(); ++node) { + + // Grab pointers to the interval and its register class. + const LiveInterval *li = node2LI[node]; + const TargetRegisterClass *liRC = mri->getRegClass(li->reg); + + // Start by assuming all allocable registers in the class are allowed... + RegVector liAllowed; + TargetRegisterClass::iterator aob = liRC->allocation_order_begin(*mf); + TargetRegisterClass::iterator aoe = liRC->allocation_order_end(*mf); + for (TargetRegisterClass::iterator it = aob; it != aoe; ++it) + if (!ReservedRegs.test(*it)) + liAllowed.push_back(*it); + + // Eliminate the physical registers which overlap with this range, along + // with all their aliases. + for (LIVector::iterator pItr = physIntervals.begin(), + pEnd = physIntervals.end(); pItr != pEnd; ++pItr) { + + if (!li->overlaps(**pItr)) + continue; + + unsigned pReg = (*pItr)->reg; + + // If we get here then the live intervals overlap, but we're still ok + // if they're coalescable. + if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end()) + continue; + + // If we get here then we have a genuine exclusion. + + // Remove the overlapping reg... + RegVector::iterator eraseItr = + std::find(liAllowed.begin(), liAllowed.end(), pReg); + + if (eraseItr != liAllowed.end()) + liAllowed.erase(eraseItr); + + const unsigned *aliasItr = tri->getAliasSet(pReg); + + if (aliasItr != 0) { + // ...and its aliases. + for (; *aliasItr != 0; ++aliasItr) { + RegVector::iterator eraseItr = + std::find(liAllowed.begin(), liAllowed.end(), *aliasItr); + + if (eraseItr != liAllowed.end()) { + liAllowed.erase(eraseItr); + } + } + } + } + + // Copy the allowed set into a member vector for use when constructing cost + // vectors & matrices, and mapping PBQP solutions back to assignments. + allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end()); + + // Set the spill cost to the interval weight, or epsilon if the + // interval weight is zero + PBQP::PBQPNum spillCost = (li->weight != 0.0) ? + li->weight : std::numeric_limits<PBQP::PBQPNum>::min(); + + // Build a cost vector for this interval. + problemNodes[node] = + problem.addNode( + buildCostVector(li->reg, allowedSets[node], coalesces, spillCost)); + + } + + + // Now add the cost matrices... + for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) { + const LiveInterval *li = node2LI[node1]; + + // Test for live range overlaps and insert interference matrices. + for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) { + const LiveInterval *li2 = node2LI[node2]; + + CoalesceMap::const_iterator cmItr = + coalesces.find(RegPair(li->reg, li2->reg)); + + PBQP::Matrix *m = 0; + + if (cmItr != coalesces.end()) { + m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2], + cmItr->second); + } + else if (li->overlaps(*li2)) { + m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]); + } + + if (m != 0) { + problem.addEdge(problemNodes[node1], + problemNodes[node2], + *m); + + delete m; + } + } + } + + assert(problem.getNumNodes() == allowedSets.size()); +/* + std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, " + << problem.getNumEdges() << " edges.\n"; + + problem.printDot(std::cerr); +*/ + // We're done, PBQP problem constructed - return it. + return problem; +} + +void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, + MachineRegisterInfo* mri) { + int stackSlot = vrm->getStackSlot(spilled->reg); + + if (stackSlot == VirtRegMap::NO_STACK_SLOT) + return; + + const TargetRegisterClass *RC = mri->getRegClass(spilled->reg); + LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC); + + VNInfo *vni; + if (stackInterval.getNumValNums() != 0) + vni = stackInterval.getValNumInfo(0); + else + vni = stackInterval.getNextValue( + SlotIndex(), 0, false, lss->getVNInfoAllocator()); + + LiveInterval &rhsInterval = lis->getInterval(spilled->reg); + stackInterval.MergeRangesInAsValue(rhsInterval, vni); +} + +bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { + + // Set to true if we have any spills + bool anotherRoundNeeded = false; + + // Clear the existing allocation. + vrm->clearAllVirt(); + + // Iterate over the nodes mapping the PBQP solution to a register assignment. + for (unsigned node = 0; node < node2LI.size(); ++node) { + unsigned virtReg = node2LI[node]->reg, + allocSelection = solution.getSelection(problemNodes[node]); + + + // If the PBQP solution is non-zero it's a physical register... + if (allocSelection != 0) { + // Get the physical reg, subtracting 1 to account for the spill option. + unsigned physReg = allowedSets[node][allocSelection - 1]; + + DEBUG(dbgs() << "VREG " << virtReg << " -> " + << tri->getName(physReg) << "\n"); + + assert(physReg != 0); + + // Add to the virt reg map and update the used phys regs. + vrm->assignVirt2Phys(virtReg, physReg); + } + // ...Otherwise it's a spill. + else { + + // Make sure we ignore this virtual reg on the next round + // of allocation + vregIntervalsToAlloc.erase(&lis->getInterval(virtReg)); + + // Insert spill ranges for this live range + const LiveInterval *spillInterval = node2LI[node]; + double oldSpillWeight = spillInterval->weight; + SmallVector<LiveInterval*, 8> spillIs; + rmf->rememberUseDefs(spillInterval); + std::vector<LiveInterval*> newSpills = + lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm); + addStackInterval(spillInterval, mri); + rmf->rememberSpills(spillInterval, newSpills); + + (void) oldSpillWeight; + DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: " + << oldSpillWeight << ", New vregs: "); + + // Copy any newly inserted live intervals into the list of regs to + // allocate. + for (std::vector<LiveInterval*>::const_iterator + itr = newSpills.begin(), end = newSpills.end(); + itr != end; ++itr) { + + assert(!(*itr)->empty() && "Empty spill range."); + + DEBUG(dbgs() << (*itr)->reg << " "); + + vregIntervalsToAlloc.insert(*itr); + } + + DEBUG(dbgs() << ")\n"); + + // We need another round if spill intervals were added. + anotherRoundNeeded |= !newSpills.empty(); + } + } + + return !anotherRoundNeeded; +} + +void PBQPRegAlloc::finalizeAlloc() const { + typedef LiveIntervals::iterator LIIterator; + typedef LiveInterval::Ranges::const_iterator LRIterator; + + // First allocate registers for the empty intervals. + for (LiveIntervalSet::const_iterator + itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end(); + itr != end; ++itr) { + LiveInterval *li = *itr; + + unsigned physReg = vrm->getRegAllocPref(li->reg); + + if (physReg == 0) { + const TargetRegisterClass *liRC = mri->getRegClass(li->reg); + physReg = *liRC->allocation_order_begin(*mf); + } + + vrm->assignVirt2Phys(li->reg, physReg); + } + + // Finally iterate over the basic blocks to compute and set the live-in sets. + SmallVector<MachineBasicBlock*, 8> liveInMBBs; + MachineBasicBlock *entryMBB = &*mf->begin(); + + for (LIIterator liItr = lis->begin(), liEnd = lis->end(); + liItr != liEnd; ++liItr) { + + const LiveInterval *li = liItr->second; + unsigned reg = 0; + + // Get the physical register for this interval + if (TargetRegisterInfo::isPhysicalRegister(li->reg)) { + reg = li->reg; + } + else if (vrm->isAssignedReg(li->reg)) { + reg = vrm->getPhys(li->reg); + } + else { + // Ranges which are assigned a stack slot only are ignored. + continue; + } + + if (reg == 0) { + // Filter out zero regs - they're for intervals that were spilled. + continue; + } + + // Iterate over the ranges of the current interval... + for (LRIterator lrItr = li->begin(), lrEnd = li->end(); + lrItr != lrEnd; ++lrItr) { + + // Find the set of basic blocks which this range is live into... + if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) { + // And add the physreg for this interval to their live-in sets. + for (unsigned i = 0; i < liveInMBBs.size(); ++i) { + if (liveInMBBs[i] != entryMBB) { + if (!liveInMBBs[i]->isLiveIn(reg)) { + liveInMBBs[i]->addLiveIn(reg); + } + } + } + liveInMBBs.clear(); + } + } + } + +} + +bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { + + mf = &MF; + tm = &mf->getTarget(); + tri = tm->getRegisterInfo(); + tii = tm->getInstrInfo(); + mri = &mf->getRegInfo(); + + lis = &getAnalysis<LiveIntervals>(); + lss = &getAnalysis<LiveStacks>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + rmf = &getAnalysis<RenderMachineFunction>(); + + vrm = &getAnalysis<VirtRegMap>(); + + + DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); + + // Allocator main loop: + // + // * Map current regalloc problem to a PBQP problem + // * Solve the PBQP problem + // * Map the solution back to a register allocation + // * Spill if necessary + // + // This process is continued till no more spills are generated. + + // Find the vreg intervals in need of allocation. + findVRegIntervalsToAlloc(); + + // If there are non-empty intervals allocate them using pbqp. + if (!vregIntervalsToAlloc.empty()) { + + bool pbqpAllocComplete = false; + unsigned round = 0; + + while (!pbqpAllocComplete) { + DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); + + PBQP::Graph problem = constructPBQPProblem(); + PBQP::Solution solution = + PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(problem); + + pbqpAllocComplete = mapPBQPToRegAlloc(solution); + + ++round; + } + } + + // Finalise allocation, allocate empty ranges. + finalizeAlloc(); + + rmf->renderMachineFunction("After PBQP register allocation.", vrm); + + vregIntervalsToAlloc.clear(); + emptyVRegIntervals.clear(); + li2Node.clear(); + node2LI.clear(); + allowedSets.clear(); + problemNodes.clear(); + + DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); + + // Run rewriter + std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter()); + + rewriter->runOnMachineFunction(*mf, *vrm, lis); + + return true; +} + +FunctionPass* llvm::createPBQPRegisterAllocator() { + return new PBQPRegAlloc(); +} + + +#undef DEBUG_TYPE diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp new file mode 100644 index 0000000..02b5539 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -0,0 +1,196 @@ +//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the generic RegisterCoalescer interface which +// is used as the common interface used by all clients and +// implementations of register coalescing. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Pass.h" + +using namespace llvm; + +// Register the RegisterCoalescer interface, providing a nice name to refer to. +static RegisterAnalysisGroup<RegisterCoalescer> Z("Register Coalescer"); +char RegisterCoalescer::ID = 0; + +// RegisterCoalescer destructor: DO NOT move this to the header file +// for RegisterCoalescer or else clients of the RegisterCoalescer +// class may not depend on the RegisterCoalescer.o file in the current +// .a file, causing alias analysis support to not be included in the +// tool correctly! +// +RegisterCoalescer::~RegisterCoalescer() {} + +unsigned CoalescerPair::compose(unsigned a, unsigned b) const { + if (!a) return b; + if (!b) return a; + return tri_.composeSubRegIndices(a, b); +} + +bool CoalescerPair::isMoveInstr(const MachineInstr *MI, + unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) const { + if (MI->isCopy()) { + Dst = MI->getOperand(0).getReg(); + DstSub = MI->getOperand(0).getSubReg(); + Src = MI->getOperand(1).getReg(); + SrcSub = MI->getOperand(1).getSubReg(); + } else if (MI->isSubregToReg()) { + Dst = MI->getOperand(0).getReg(); + DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm()); + Src = MI->getOperand(2).getReg(); + SrcSub = MI->getOperand(2).getSubReg(); + } else + return false; + return true; +} + +bool CoalescerPair::setRegisters(const MachineInstr *MI) { + srcReg_ = dstReg_ = subIdx_ = 0; + newRC_ = 0; + flipped_ = crossClass_ = false; + + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + return false; + partial_ = SrcSub || DstSub; + + // If one register is a physreg, it must be Dst. + if (TargetRegisterInfo::isPhysicalRegister(Src)) { + if (TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + flipped_ = true; + } + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + + if (TargetRegisterInfo::isPhysicalRegister(Dst)) { + // Eliminate DstSub on a physreg. + if (DstSub) { + Dst = tri_.getSubReg(Dst, DstSub); + if (!Dst) return false; + DstSub = 0; + } + + // Eliminate SrcSub by picking a corresponding Dst superregister. + if (SrcSub) { + Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); + if (!Dst) return false; + SrcSub = 0; + } else if (!MRI.getRegClass(Src)->contains(Dst)) { + return false; + } + } else { + // Both registers are virtual. + + // Both registers have subreg indices. + if (SrcSub && DstSub) { + // For now we only handle the case of identical indices in commensurate + // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg + // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg. + if (SrcSub != DstSub) + return false; + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + if (!getCommonSubClass(DstRC, SrcRC)) + return false; + SrcSub = DstSub = 0; + } + + // There can be no SrcSub. + if (SrcSub) { + std::swap(Src, Dst); + DstSub = SrcSub; + SrcSub = 0; + assert(!flipped_ && "Unexpected flip"); + flipped_ = true; + } + + // Find the new register class. + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + if (DstSub) + newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); + else + newRC_ = getCommonSubClass(DstRC, SrcRC); + if (!newRC_) + return false; + crossClass_ = newRC_ != DstRC || newRC_ != SrcRC; + } + // Check our invariants + assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); + assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && + "Cannot have a physical SubIdx"); + srcReg_ = Src; + dstReg_ = Dst; + subIdx_ = DstSub; + return true; +} + +bool CoalescerPair::flip() { + if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_)) + return false; + std::swap(srcReg_, dstReg_); + flipped_ = !flipped_; + return true; +} + +bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { + if (!MI) + return false; + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + return false; + + // Find the virtual register that is srcReg_. + if (Dst == srcReg_) { + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + } else if (Src != srcReg_) { + return false; + } + + // Now check that Dst matches dstReg_. + if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) { + if (!TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + assert(!subIdx_ && "Inconsistent CoalescerPair state."); + // DstSub could be set for a physreg from INSERT_SUBREG. + if (DstSub) + Dst = tri_.getSubReg(Dst, DstSub); + // Full copy of Src. + if (!SrcSub) + return dstReg_ == Dst; + // This is a partial register copy. Check that the parts match. + return tri_.getSubReg(dstReg_, SrcSub) == Dst; + } else { + // dstReg_ is virtual. + if (dstReg_ != Dst) + return false; + // Registers match, do the subregisters line up? + return compose(subIdx_, SrcSub) == DstSub; + } +} + +// Because of the way .a files work, we must force the SimpleRC +// implementation to be pulled in if the RegisterCoalescer classes are +// pulled in. Otherwise we run the risk of RegisterCoalescer being +// used, but the default implementation not being linked into the tool +// that uses it. +DEFINING_FILE_FOR(RegisterCoalescer) diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp new file mode 100644 index 0000000..a2580b8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -0,0 +1,384 @@ +//===-- RegisterScavenging.cpp - Machine register scavenging --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine register scavenger. It can provide +// information, such as unused registers, at any point in a machine basic block. +// It also provides a mechanism to make registers available by evicting them to +// spill slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reg-scavenging" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +/// setUsed - Set the register and its sub-registers as being used. +void RegScavenger::setUsed(unsigned Reg) { + RegsAvailable.reset(Reg); + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + RegsAvailable.reset(SubReg); +} + +bool RegScavenger::isAliasUsed(unsigned Reg) const { + if (isUsed(Reg)) + return true; + for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R) + if (isUsed(*R)) + return true; + return false; +} + +void RegScavenger::initRegState() { + ScavengedReg = 0; + ScavengedRC = NULL; + ScavengeRestore = NULL; + + // All registers started out unused. + RegsAvailable.set(); + + // Reserved registers are always used. + RegsAvailable ^= ReservedRegs; + + if (!MBB) + return; + + // Live-in registers are in use. + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + setUsed(*I); + + // Pristine CSRs are also unavailable. + BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB); + for (int I = PR.find_first(); I>0; I = PR.find_next(I)) + setUsed(I); +} + +void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { + MachineFunction &MF = *mbb->getParent(); + const TargetMachine &TM = MF.getTarget(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + MRI = &MF.getRegInfo(); + + assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && + "Target changed?"); + + // Self-initialize. + if (!MBB) { + NumPhysRegs = TRI->getNumRegs(); + RegsAvailable.resize(NumPhysRegs); + + // Create reserved registers bitvector. + ReservedRegs = TRI->getReservedRegs(MF); + + // Create callee-saved registers bitvector. + CalleeSavedRegs.resize(NumPhysRegs); + const unsigned *CSRegs = TRI->getCalleeSavedRegs(); + if (CSRegs != NULL) + for (unsigned i = 0; CSRegs[i]; ++i) + CalleeSavedRegs.set(CSRegs[i]); + } + + MBB = mbb; + initRegState(); + + Tracking = false; +} + +void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { + BV.set(Reg); + for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++) + BV.set(*R); +} + +void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) { + BV.set(Reg); + for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++) + BV.set(*R); +} + +void RegScavenger::forward() { + // Move ptr forward. + if (!Tracking) { + MBBI = MBB->begin(); + Tracking = true; + } else { + assert(MBBI != MBB->end() && "Already at the end of the basic block!"); + MBBI = llvm::next(MBBI); + } + + MachineInstr *MI = MBBI; + + if (MI == ScavengeRestore) { + ScavengedReg = 0; + ScavengedRC = NULL; + ScavengeRestore = NULL; + } + + if (MI->isDebugValue()) + return; + + // Find out which registers are early clobbered, killed, defined, and marked + // def-dead in this instruction. + // FIXME: The scavenger is not predication aware. If the instruction is + // predicated, conservatively assume "kill" markers do not actually kill the + // register. Similarly ignores "dead" markers. + bool isPred = TII->isPredicated(MI); + BitVector EarlyClobberRegs(NumPhysRegs); + BitVector KillRegs(NumPhysRegs); + BitVector DefRegs(NumPhysRegs); + BitVector DeadRegs(NumPhysRegs); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isUndef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || isReserved(Reg)) + continue; + + if (MO.isUse()) { + // Two-address operands implicitly kill. + if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i))) + addRegWithSubRegs(KillRegs, Reg); + } else { + assert(MO.isDef()); + if (!isPred && MO.isDead()) + addRegWithSubRegs(DeadRegs, Reg); + else + addRegWithSubRegs(DefRegs, Reg); + if (MO.isEarlyClobber()) + addRegWithAliases(EarlyClobberRegs, Reg); + } + } + + // Verify uses and defs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isUndef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || isReserved(Reg)) + continue; + if (MO.isUse()) { + if (!isUsed(Reg)) { + // Check if it's partial live: e.g. + // D0 = insert_subreg D0<undef>, S0 + // ... D0 + // The problem is the insert_subreg could be eliminated. The use of + // D0 is using a partially undef value. This is not *incorrect* since + // S1 is can be freely clobbered. + // Ideally we would like a way to model this, but leaving the + // insert_subreg around causes both correctness and performance issues. + bool SubUsed = false; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + if (isUsed(SubReg)) { + SubUsed = true; + break; + } + assert(SubUsed && "Using an undefined register!"); + } + assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) && + "Using an early clobbered register!"); + } else { + assert(MO.isDef()); +#if 0 + // FIXME: Enable this once we've figured out how to correctly transfer + // implicit kills during codegen passes like the coalescer. + assert((KillRegs.test(Reg) || isUnused(Reg) || + isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) && + "Re-defining a live register!"); +#endif + } + } + + // Commit the changes. + setUnused(KillRegs); + setUnused(DeadRegs); + setUsed(DefRegs); +} + +void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { + if (includeReserved) + used = ~RegsAvailable; + else + used = ~RegsAvailable & ~ReservedRegs; +} + +unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (!isAliasUsed(*I)) { + DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) << + "\n"); + return *I; + } + return 0; +} + +/// getRegsAvailable - Return all available registers in the register class +/// in Mask. +void RegScavenger::getRegsAvailable(const TargetRegisterClass *RC, + BitVector &Mask) { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (!isAliasUsed(*I)) + Mask.set(*I); +} + +/// findSurvivorReg - Return the candidate register that is unused for the +/// longest after StargMII. UseMI is set to the instruction where the search +/// stopped. +/// +/// No more than InstrLimit instructions are inspected. +/// +unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, + BitVector &Candidates, + unsigned InstrLimit, + MachineBasicBlock::iterator &UseMI) { + int Survivor = Candidates.find_first(); + assert(Survivor > 0 && "No candidates for scavenging"); + + MachineBasicBlock::iterator ME = MBB->getFirstTerminator(); + assert(StartMI != ME && "MI already at terminator"); + MachineBasicBlock::iterator RestorePointMI = StartMI; + MachineBasicBlock::iterator MI = StartMI; + + bool inVirtLiveRange = false; + for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { + if (MI->isDebugValue()) { + ++InstrLimit; // Don't count debug instructions + continue; + } + bool isVirtKillInsn = false; + bool isVirtDefInsn = false; + // Remove any candidates touched by instruction. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isUndef() || !MO.getReg()) + continue; + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isDef()) + isVirtDefInsn = true; + else if (MO.isKill()) + isVirtKillInsn = true; + continue; + } + Candidates.reset(MO.getReg()); + for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++) + Candidates.reset(*R); + } + // If we're not in a virtual reg's live range, this is a valid + // restore point. + if (!inVirtLiveRange) RestorePointMI = MI; + + // Update whether we're in the live range of a virtual register + if (isVirtKillInsn) inVirtLiveRange = false; + if (isVirtDefInsn) inVirtLiveRange = true; + + // Was our survivor untouched by this instruction? + if (Candidates.test(Survivor)) + continue; + + // All candidates gone? + if (Candidates.none()) + break; + + Survivor = Candidates.find_first(); + } + // If we ran off the end, that's where we want to restore. + if (MI == ME) RestorePointMI = ME; + assert (RestorePointMI != StartMI && + "No available scavenger restore location!"); + + // We ran out of candidates, so stop the search. + UseMI = RestorePointMI; + return Survivor; +} + +unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, + MachineBasicBlock::iterator I, + int SPAdj) { + // Consider all allocatable registers in the register class initially + BitVector Candidates = + TRI->getAllocatableSet(*I->getParent()->getParent(), RC); + + // Exclude all the registers being used by the instruction. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (MO.isReg() && MO.getReg() != 0 && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + Candidates.reset(MO.getReg()); + } + + // Try to find a register that's unused if there is one, as then we won't + // have to spill. + if ((Candidates & RegsAvailable).any()) + Candidates &= RegsAvailable; + + // Find the register whose use is furthest away. + MachineBasicBlock::iterator UseMI; + unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); + + // If we found an unused register there is no reason to spill it. + if (!isAliasUsed(SReg)) { + DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); + return SReg; + } + + assert(ScavengedReg == 0 && + "Scavenger slot is live, unable to scavenge another register!"); + + // Avoid infinite regress + ScavengedReg = SReg; + + // If the target knows how to save/restore the register, let it do so; + // otherwise, use the emergency stack spill slot. + if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { + // Spill the scavenged register before I. + assert(ScavengingFrameIndex >= 0 && + "Cannot scavenge register without an emergency spill slot!"); + TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI); + MachineBasicBlock::iterator II = prior(I); + TRI->eliminateFrameIndex(II, SPAdj, this); + + // Restore the scavenged register before its use (or first terminator). + TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI); + II = prior(UseMI); + TRI->eliminateFrameIndex(II, SPAdj, this); + } + + ScavengeRestore = prior(UseMI); + + // Doing this here leads to infinite regress. + // ScavengedReg = SReg; + ScavengedRC = RC; + + DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << + "\n"); + + return SReg; +} diff --git a/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp new file mode 100644 index 0000000..93426ee --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp @@ -0,0 +1,1014 @@ +//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "rendermf" + +#include "RenderMachineFunction.h" + +#include "VirtRegMap.h" + +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + +#include <sstream> + +using namespace llvm; + +char RenderMachineFunction::ID = 0; +INITIALIZE_PASS(RenderMachineFunction, "rendermf", + "Render machine functions (and related info) to HTML pages", + false, false); + +static cl::opt<std::string> +outputFileSuffix("rmf-file-suffix", + cl::desc("Appended to function name to get output file name " + "(default: \".html\")"), + cl::init(".html"), cl::Hidden); + +static cl::opt<std::string> +machineFuncsToRender("rmf-funcs", + cl::desc("Coma seperated list of functions to render" + ", or \"*\"."), + cl::init(""), cl::Hidden); + +static cl::opt<std::string> +pressureClasses("rmf-classes", + cl::desc("Register classes to render pressure for."), + cl::init(""), cl::Hidden); + +static cl::opt<std::string> +showIntervals("rmf-intervals", + cl::desc("Live intervals to show alongside code."), + cl::init(""), cl::Hidden); + +static cl::opt<bool> +filterEmpty("rmf-filter-empty-intervals", + cl::desc("Don't display empty intervals."), + cl::init(true), cl::Hidden); + +static cl::opt<bool> +showEmptyIndexes("rmf-empty-indexes", + cl::desc("Render indexes not associated with instructions or " + "MBB starts."), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +useFancyVerticals("rmf-fancy-verts", + cl::desc("Use SVG for vertical text."), + cl::init(true), cl::Hidden); + +static cl::opt<bool> +prettyHTML("rmf-pretty-html", + cl::desc("Pretty print HTML. For debugging the renderer only.."), + cl::init(false), cl::Hidden); + + +namespace llvm { + + bool MFRenderingOptions::renderingOptionsProcessed; + std::set<std::string> MFRenderingOptions::mfNamesToRender; + bool MFRenderingOptions::renderAllMFs = false; + + std::set<std::string> MFRenderingOptions::classNamesToRender; + bool MFRenderingOptions::renderAllClasses = false; + + std::set<std::pair<unsigned, unsigned> > + MFRenderingOptions::intervalNumsToRender; + unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly; + + template <typename OutputItr> + void MFRenderingOptions::splitComaSeperatedList(const std::string &s, + OutputItr outItr) { + std::string::const_iterator curPos = s.begin(); + std::string::const_iterator nextComa = std::find(curPos, s.end(), ','); + while (nextComa != s.end()) { + std::string elem; + std::copy(curPos, nextComa, std::back_inserter(elem)); + *outItr = elem; + ++outItr; + curPos = llvm::next(nextComa); + nextComa = std::find(curPos, s.end(), ','); + } + + if (curPos != s.end()) { + std::string elem; + std::copy(curPos, s.end(), std::back_inserter(elem)); + *outItr = elem; + ++outItr; + } + } + + void MFRenderingOptions::processOptions() { + if (!renderingOptionsProcessed) { + processFuncNames(); + processRegClassNames(); + processIntervalNumbers(); + renderingOptionsProcessed = true; + } + } + + void MFRenderingOptions::processFuncNames() { + if (machineFuncsToRender == "*") { + renderAllMFs = true; + } else { + splitComaSeperatedList(machineFuncsToRender, + std::inserter(mfNamesToRender, + mfNamesToRender.begin())); + } + } + + void MFRenderingOptions::processRegClassNames() { + if (pressureClasses == "*") { + renderAllClasses = true; + } else { + splitComaSeperatedList(pressureClasses, + std::inserter(classNamesToRender, + classNamesToRender.begin())); + } + } + + void MFRenderingOptions::processIntervalNumbers() { + std::set<std::string> intervalRanges; + splitComaSeperatedList(showIntervals, + std::inserter(intervalRanges, + intervalRanges.begin())); + std::for_each(intervalRanges.begin(), intervalRanges.end(), + processIntervalRange); + } + + void MFRenderingOptions::processIntervalRange( + const std::string &intervalRangeStr) { + if (intervalRangeStr == "*") { + intervalTypesToRender |= All; + } else if (intervalRangeStr == "virt-nospills*") { + intervalTypesToRender |= VirtNoSpills; + } else if (intervalRangeStr == "spills*") { + intervalTypesToRender |= VirtSpills; + } else if (intervalRangeStr == "virt*") { + intervalTypesToRender |= AllVirt; + } else if (intervalRangeStr == "phys*") { + intervalTypesToRender |= AllPhys; + } else { + std::istringstream iss(intervalRangeStr); + unsigned reg1, reg2; + if ((iss >> reg1 >> std::ws)) { + if (iss.eof()) { + intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1)); + } else { + char c; + iss >> c; + if (c == '-' && (iss >> reg2)) { + intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1)); + } else { + dbgs() << "Warning: Invalid interval range \"" + << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; + } + } + } else { + dbgs() << "Warning: Invalid interval number \"" + << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; + } + } + } + + void MFRenderingOptions::setup(MachineFunction *mf, + const TargetRegisterInfo *tri, + LiveIntervals *lis, + const RenderMachineFunction *rmf) { + this->mf = mf; + this->tri = tri; + this->lis = lis; + this->rmf = rmf; + + clear(); + } + + void MFRenderingOptions::clear() { + regClassesTranslatedToCurrentFunction = false; + regClassSet.clear(); + + intervalsTranslatedToCurrentFunction = false; + intervalSet.clear(); + } + + void MFRenderingOptions::resetRenderSpecificOptions() { + intervalSet.clear(); + intervalsTranslatedToCurrentFunction = false; + } + + bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const { + processOptions(); + + return (renderAllMFs || + mfNamesToRender.find(mf->getFunction()->getName()) != + mfNamesToRender.end()); + } + + const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{ + translateRegClassNamesToCurrentFunction(); + return regClassSet; + } + + const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const { + translateIntervalNumbersToCurrentFunction(); + return intervalSet; + } + + bool MFRenderingOptions::renderEmptyIndexes() const { + return showEmptyIndexes; + } + + bool MFRenderingOptions::fancyVerticals() const { + return useFancyVerticals; + } + + void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const { + if (!regClassesTranslatedToCurrentFunction) { + processOptions(); + for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + if (renderAllClasses || + classNamesToRender.find(trc->getName()) != + classNamesToRender.end()) { + regClassSet.insert(trc); + } + } + regClassesTranslatedToCurrentFunction = true; + } + } + + void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const { + if (!intervalsTranslatedToCurrentFunction) { + processOptions(); + + // If we're not just doing explicit then do a copy over all matching + // types. + if (intervalTypesToRender != ExplicitOnly) { + for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval *li = liItr->second; + + if (filterEmpty && li->empty()) + continue; + + if ((TargetRegisterInfo::isPhysicalRegister(li->reg) && + (intervalTypesToRender & AllPhys))) { + intervalSet.insert(li); + } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) { + if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) || + ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) { + intervalSet.insert(li); + } + } + } + } + + // If we need to process the explicit list... + if (intervalTypesToRender != All) { + for (std::set<std::pair<unsigned, unsigned> >::const_iterator + regRangeItr = intervalNumsToRender.begin(), + regRangeEnd = intervalNumsToRender.end(); + regRangeItr != regRangeEnd; ++regRangeItr) { + const std::pair<unsigned, unsigned> &range = *regRangeItr; + for (unsigned reg = range.first; reg != range.second; ++reg) { + if (lis->hasInterval(reg)) { + intervalSet.insert(&lis->getInterval(reg)); + } + } + } + } + + intervalsTranslatedToCurrentFunction = true; + } + } + + // ---------- TargetRegisterExtraInformation implementation ---------- + + TargetRegisterExtraInfo::TargetRegisterExtraInfo() + : mapsPopulated(false) { + } + + void TargetRegisterExtraInfo::setup(MachineFunction *mf, + MachineRegisterInfo *mri, + const TargetRegisterInfo *tri, + LiveIntervals *lis) { + this->mf = mf; + this->mri = mri; + this->tri = tri; + this->lis = lis; + } + + void TargetRegisterExtraInfo::reset() { + if (!mapsPopulated) { + initWorst(); + //initBounds(); + initCapacity(); + mapsPopulated = true; + } + + resetPressureAndLiveStates(); + } + + void TargetRegisterExtraInfo::clear() { + prWorst.clear(); + vrWorst.clear(); + capacityMap.clear(); + pressureMap.clear(); + //liveStatesMap.clear(); + mapsPopulated = false; + } + + void TargetRegisterExtraInfo::initWorst() { + assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() && + "Worst map already initialised?"); + + // Start with the physical registers. + for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) { + WorstMapLine &pregLine = prWorst[preg]; + + for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + + unsigned numOverlaps = 0; + for (TargetRegisterClass::iterator rItr = trc->begin(), + rEnd = trc->end(); + rItr != rEnd; ++rItr) { + unsigned trcPReg = *rItr; + if (tri->regsOverlap(preg, trcPReg)) + ++numOverlaps; + } + + pregLine[trc] = numOverlaps; + } + } + + // Now the register classes. + for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rc1Itr != rcEnd; ++rc1Itr) { + const TargetRegisterClass *trc1 = *rc1Itr; + WorstMapLine &classLine = vrWorst[trc1]; + + for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin(); + rc2Itr != rcEnd; ++rc2Itr) { + const TargetRegisterClass *trc2 = *rc2Itr; + + unsigned worst = 0; + + for (TargetRegisterClass::iterator trc1Itr = trc1->begin(), + trc1End = trc1->end(); + trc1Itr != trc1End; ++trc1Itr) { + unsigned trc1Reg = *trc1Itr; + unsigned trc1RegWorst = 0; + + for (TargetRegisterClass::iterator trc2Itr = trc2->begin(), + trc2End = trc2->end(); + trc2Itr != trc2End; ++trc2Itr) { + unsigned trc2Reg = *trc2Itr; + if (tri->regsOverlap(trc1Reg, trc2Reg)) + ++trc1RegWorst; + } + if (trc1RegWorst > worst) { + worst = trc1RegWorst; + } + } + + if (worst != 0) { + classLine[trc2] = worst; + } + } + } + } + + unsigned TargetRegisterExtraInfo::getWorst( + unsigned reg, + const TargetRegisterClass *trc) const { + const WorstMapLine *wml = 0; + if (TargetRegisterInfo::isPhysicalRegister(reg)) { + PRWorstMap::const_iterator prwItr = prWorst.find(reg); + assert(prwItr != prWorst.end() && "Missing prWorst entry."); + wml = &prwItr->second; + } else { + const TargetRegisterClass *regTRC = mri->getRegClass(reg); + VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC); + assert(vrwItr != vrWorst.end() && "Missing vrWorst entry."); + wml = &vrwItr->second; + } + + WorstMapLine::const_iterator wmlItr = wml->find(trc); + if (wmlItr == wml->end()) + return 0; + + return wmlItr->second; + } + + void TargetRegisterExtraInfo::initCapacity() { + assert(!mapsPopulated && capacityMap.empty() && + "Capacity map already initialised?"); + + for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + unsigned capacity = std::distance(trc->allocation_order_begin(*mf), + trc->allocation_order_end(*mf)); + + if (capacity != 0) + capacityMap[trc] = capacity; + } + } + + unsigned TargetRegisterExtraInfo::getCapacity( + const TargetRegisterClass *trc) const { + CapacityMap::const_iterator cmItr = capacityMap.find(trc); + assert(cmItr != capacityMap.end() && + "vreg with unallocable register class"); + return cmItr->second; + } + + void TargetRegisterExtraInfo::resetPressureAndLiveStates() { + pressureMap.clear(); + //liveStatesMap.clear(); + + // Iterate over all slots. + + + // Iterate over all live intervals. + for (LiveIntervals::iterator liItr = lis->begin(), + liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval *li = liItr->second; + + const TargetRegisterClass *liTRC; + + if (TargetRegisterInfo::isPhysicalRegister(li->reg)) + continue; + + liTRC = mri->getRegClass(li->reg); + + + // For all ranges in the current interal. + for (LiveInterval::iterator lrItr = li->begin(), + lrEnd = li->end(); + lrItr != lrEnd; ++lrItr) { + LiveRange *lr = &*lrItr; + + // For all slots in the current range. + for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) { + + // Record increased pressure at index for all overlapping classes. + for (TargetRegisterInfo::regclass_iterator + rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + + if (trc->allocation_order_begin(*mf) == + trc->allocation_order_end(*mf)) + continue; + + unsigned worstAtI = getWorst(li->reg, trc); + + if (worstAtI != 0) { + pressureMap[i][trc] += worstAtI; + } + } + } + } + } + } + + unsigned TargetRegisterExtraInfo::getPressureAtSlot( + const TargetRegisterClass *trc, + SlotIndex i) const { + PressureMap::const_iterator pmItr = pressureMap.find(i); + if (pmItr == pressureMap.end()) + return 0; + const PressureMapLine &pmLine = pmItr->second; + PressureMapLine::const_iterator pmlItr = pmLine.find(trc); + if (pmlItr == pmLine.end()) + return 0; + return pmlItr->second; + } + + bool TargetRegisterExtraInfo::classOverCapacityAtSlot( + const TargetRegisterClass *trc, + SlotIndex i) const { + return (getPressureAtSlot(trc, i) > getCapacity(trc)); + } + + // ---------- MachineFunctionRenderer implementation ---------- + + void RenderMachineFunction::Spacer::print(raw_ostream &os) const { + if (!prettyHTML) + return; + for (unsigned i = 0; i < ns; ++i) { + os << " "; + } + } + + RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const { + return Spacer(ns); + } + + raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) { + s.print(os); + return os; + } + + template <typename Iterator> + std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const { + std::string r; + + for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) { + char c = *sItr; + + switch (c) { + case '<': r.append("<"); break; + case '>': r.append(">"); break; + case '&': r.append("&"); break; + case ' ': r.append(" "); break; + case '\"': r.append("""); break; + default: r.push_back(c); break; + } + } + + return r; + } + + RenderMachineFunction::LiveState + RenderMachineFunction::getLiveStateAt(const LiveInterval *li, + SlotIndex i) const { + const MachineInstr *mi = sis->getInstructionFromIndex(i); + + // For uses/defs recorded use/def indexes override current liveness and + // instruction operands (Only for the interval which records the indexes). + if (i.isUse() || i.isDef()) { + UseDefs::const_iterator udItr = useDefs.find(li); + if (udItr != useDefs.end()) { + const SlotSet &slotSet = udItr->second; + if (slotSet.count(i)) { + if (i.isUse()) { + return Used; + } + // else + return Defined; + } + } + } + + // If the slot is a load/store, or there's no info in the use/def set then + // use liveness and instruction operand info. + if (li->liveAt(i)) { + + if (mi == 0) { + if (vrm == 0 || + (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { + return AliveReg; + } else { + return AliveStack; + } + } else { + if (i.isDef() && mi->definesRegister(li->reg, tri)) { + return Defined; + } else if (i.isUse() && mi->readsRegister(li->reg)) { + return Used; + } else { + if (vrm == 0 || + (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { + return AliveReg; + } else { + return AliveStack; + } + } + } + } + return Dead; + } + + RenderMachineFunction::PressureState + RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc, + SlotIndex i) const { + if (trei.getPressureAtSlot(trc, i) == 0) { + return Zero; + } else if (trei.classOverCapacityAtSlot(trc, i)){ + return High; + } + return Low; + } + + /// \brief Render a machine instruction. + void RenderMachineFunction::renderMachineInstr(raw_ostream &os, + const MachineInstr *mi) const { + std::string s; + raw_string_ostream oss(s); + oss << *mi; + + os << escapeChars(oss.str()); + } + + template <typename T> + void RenderMachineFunction::renderVertical(const Spacer &indent, + raw_ostream &os, + const T &t) const { + if (ro.fancyVerticals()) { + os << indent << "<object\n" + << indent + s(2) << "class=\"obj\"\n" + << indent + s(2) << "type=\"image/svg+xml\"\n" + << indent + s(2) << "width=\"14px\"\n" + << indent + s(2) << "height=\"55px\"\n" + << indent + s(2) << "data=\"data:image/svg+xml,\n" + << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n" + << indent + s(6) << "<text x='-55' y='10' " + "font-family='Courier' font-size='12' " + "transform='rotate(-90)' " + "text-rendering='optimizeSpeed' " + "fill='#000'>" << t << "</text>\n" + << indent + s(4) << "</svg>\">\n" + << indent << "</object>\n"; + } else { + std::ostringstream oss; + oss << t; + std::string tStr(oss.str()); + + os << indent; + for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end(); + tStrItr != tStrEnd; ++tStrItr) { + os << *tStrItr << "<br/>"; + } + os << "\n"; + } + } + + void RenderMachineFunction::insertCSS(const Spacer &indent, + raw_ostream &os) const { + os << indent << "<style type=\"text/css\">\n" + << indent + s(2) << "body { font-color: black; }\n" + << indent + s(2) << "table.code td { font-family: monospace; " + "border-width: 0px; border-style: solid; " + "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n" + << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n" + << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n" + << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n" + << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n" + << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n" + << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n" + << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n" + << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n" + << indent + s(2) << "table.code th { border-width: 0px; " + "border-style: solid; }\n" + << indent << "</style>\n"; + } + + void RenderMachineFunction::renderFunctionSummary( + const Spacer &indent, raw_ostream &os, + const char * const renderContextStr) const { + os << indent << "<h1>Function: " << mf->getFunction()->getName() + << "</h1>\n" + << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n"; + } + + + void RenderMachineFunction::renderPressureTableLegend( + const Spacer &indent, + raw_ostream &os) const { + os << indent << "<h2>Rendering Pressure Legend:</h2>\n" + << indent << "<table class=\"code\">\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<th>Pressure</th><th>Description</th>" + "<th>Appearance</th>\n" + << indent + s(2) << "</tr>\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<td>No Pressure</td>" + "<td>No physical registers of this class requested.</td>" + "<td class=\"p-z\"> </td>\n" + << indent + s(2) << "</tr>\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<td>Low Pressure</td>" + "<td>Sufficient physical registers to meet demand.</td>" + "<td class=\"p-l\"> </td>\n" + << indent + s(2) << "</tr>\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<td>High Pressure</td>" + "<td>Potentially insufficient physical registers to meet demand.</td>" + "<td class=\"p-h\"> </td>\n" + << indent + s(2) << "</tr>\n" + << indent << "</table>\n"; + } + + template <typename CellType> + void RenderMachineFunction::renderCellsWithRLE( + const Spacer &indent, raw_ostream &os, + const std::pair<CellType, unsigned> &rleAccumulator, + const std::map<CellType, std::string> &cellTypeStrs) const { + + if (rleAccumulator.second == 0) + return; + + typename std::map<CellType, std::string>::const_iterator ctsItr = + cellTypeStrs.find(rleAccumulator.first); + + assert(ctsItr != cellTypeStrs.end() && "No string for given cell type."); + + os << indent + s(4) << "<td class=\"" << ctsItr->second << "\""; + if (rleAccumulator.second > 1) + os << " colspan=" << rleAccumulator.second; + os << "></td>\n"; + } + + + void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent, + raw_ostream &os) const { + + std::map<LiveState, std::string> lsStrs; + lsStrs[Dead] = "l-n"; + lsStrs[Defined] = "l-d"; + lsStrs[Used] = "l-u"; + lsStrs[AliveReg] = "l-r"; + lsStrs[AliveStack] = "l-s"; + + std::map<PressureState, std::string> psStrs; + psStrs[Zero] = "p-z"; + psStrs[Low] = "p-l"; + psStrs[High] = "p-h"; + + // Open the table... + + os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n" + << indent + s(2) << "<tr>\n"; + + // Render the header row... + + os << indent + s(4) << "<th>index</th>\n" + << indent + s(4) << "<th>instr</th>\n"; + + // Render class names if necessary... + if (!ro.regClasses().empty()) { + for (MFRenderingOptions::RegClassSet::const_iterator + rcItr = ro.regClasses().begin(), + rcEnd = ro.regClasses().end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + os << indent + s(4) << "<th>\n"; + renderVertical(indent + s(6), os, trc->getName()); + os << indent + s(4) << "</th>\n"; + } + } + + // FIXME: Is there a nicer way to insert space between columns in HTML? + if (!ro.regClasses().empty() && !ro.intervals().empty()) + os << indent + s(4) << "<th> </th>\n"; + + // Render interval numbers if necessary... + if (!ro.intervals().empty()) { + for (MFRenderingOptions::IntervalSet::const_iterator + liItr = ro.intervals().begin(), + liEnd = ro.intervals().end(); + liItr != liEnd; ++liItr) { + + const LiveInterval *li = *liItr; + os << indent + s(4) << "<th>\n"; + renderVertical(indent + s(6), os, li->reg); + os << indent + s(4) << "</th>\n"; + } + } + + os << indent + s(2) << "</tr>\n"; + + // End header row, start with the data rows... + + MachineInstr *mi = 0; + + // Data rows: + for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex(); + i = i.getNextSlot()) { + + // Render the slot column. + os << indent + s(2) << "<tr height=6ex>\n"; + + // Render the code column. + if (i.isLoad()) { + MachineBasicBlock *mbb = sis->getMBBFromIndex(i); + mi = sis->getInstructionFromIndex(i); + + if (i == sis->getMBBStartIdx(mbb) || mi != 0 || + ro.renderEmptyIndexes()) { + os << indent + s(4) << "<td rowspan=4>" << i << " </td>\n" + << indent + s(4) << "<td rowspan=4>\n"; + + if (i == sis->getMBBStartIdx(mbb)) { + os << indent + s(6) << "BB#" << mbb->getNumber() << ": \n"; + } else if (mi != 0) { + os << indent + s(6) << " "; + renderMachineInstr(os, mi); + } else { + // Empty interval - leave blank. + } + os << indent + s(4) << "</td>\n"; + } else { + i = i.getStoreIndex(); // <- Will be incremented to the next index. + continue; + } + } + + // Render the class columns. + if (!ro.regClasses().empty()) { + std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0); + for (MFRenderingOptions::RegClassSet::const_iterator + rcItr = ro.regClasses().begin(), + rcEnd = ro.regClasses().end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + PressureState newPressure = getPressureStateAt(trc, i); + + if (newPressure == psRLEAccumulator.first) { + ++psRLEAccumulator.second; + } else { + renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); + psRLEAccumulator.first = newPressure; + psRLEAccumulator.second = 1; + } + } + renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); + } + + // FIXME: Is there a nicer way to insert space between columns in HTML? + if (!ro.regClasses().empty() && !ro.intervals().empty()) + os << indent + s(4) << "<td width=2em></td>\n"; + + if (!ro.intervals().empty()) { + std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0); + for (MFRenderingOptions::IntervalSet::const_iterator + liItr = ro.intervals().begin(), + liEnd = ro.intervals().end(); + liItr != liEnd; ++liItr) { + const LiveInterval *li = *liItr; + LiveState newLiveness = getLiveStateAt(li, i); + + if (newLiveness == lsRLEAccumulator.first) { + ++lsRLEAccumulator.second; + } else { + renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); + lsRLEAccumulator.first = newLiveness; + lsRLEAccumulator.second = 1; + } + } + renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); + } + os << indent + s(2) << "</tr>\n"; + } + + os << indent << "</table>\n"; + + if (!ro.regClasses().empty()) + renderPressureTableLegend(indent, os); + } + + void RenderMachineFunction::renderFunctionPage( + raw_ostream &os, + const char * const renderContextStr) const { + os << "<html>\n" + << s(2) << "<head>\n" + << s(4) << "<title>" << fqn << "</title>\n"; + + insertCSS(s(4), os); + + os << s(2) << "<head>\n" + << s(2) << "<body >\n"; + + renderFunctionSummary(s(4), os, renderContextStr); + + os << s(4) << "<br/><br/><br/>\n"; + + //renderLiveIntervalInfoTable(" ", os); + + os << s(4) << "<br/><br/><br/>\n"; + + renderCodeTablePlusPI(s(4), os); + + os << s(2) << "</body>\n" + << "</html>\n"; + } + + void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<SlotIndexes>(); + au.addRequired<LiveIntervals>(); + au.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(au); + } + + bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) { + + mf = &fn; + mri = &mf->getRegInfo(); + tri = mf->getTarget().getRegisterInfo(); + lis = &getAnalysis<LiveIntervals>(); + sis = &getAnalysis<SlotIndexes>(); + + trei.setup(mf, mri, tri, lis); + ro.setup(mf, tri, lis, this); + spillIntervals.clear(); + spillFor.clear(); + useDefs.clear(); + + fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + + mf->getFunction()->getName().str(); + + return false; + } + + void RenderMachineFunction::releaseMemory() { + trei.clear(); + ro.clear(); + spillIntervals.clear(); + spillFor.clear(); + useDefs.clear(); + } + + void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) { + + if (!ro.shouldRenderCurrentMachineFunction()) + return; + + for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg), + rEnd = mri->reg_end(); + rItr != rEnd; ++rItr) { + const MachineInstr *mi = &*rItr; + if (mi->readsRegister(li->reg)) { + useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex()); + } + if (mi->definesRegister(li->reg)) { + useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex()); + } + } + } + + void RenderMachineFunction::rememberSpills( + const LiveInterval *li, + const std::vector<LiveInterval*> &spills) { + + if (!ro.shouldRenderCurrentMachineFunction()) + return; + + for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(), + siEnd = spills.end(); + siItr != siEnd; ++siItr) { + const LiveInterval *spill = *siItr; + spillIntervals[li].insert(spill); + spillFor[spill] = li; + } + } + + bool RenderMachineFunction::isSpill(const LiveInterval *li) const { + SpillForMap::const_iterator sfItr = spillFor.find(li); + if (sfItr == spillFor.end()) + return false; + return true; + } + + void RenderMachineFunction::renderMachineFunction( + const char *renderContextStr, + const VirtRegMap *vrm, + const char *renderSuffix) { + if (!ro.shouldRenderCurrentMachineFunction()) + return; + + this->vrm = vrm; + trei.reset(); + + std::string rpFileName(mf->getFunction()->getName().str() + + (renderSuffix ? renderSuffix : "") + + outputFileSuffix); + + std::string errMsg; + raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary); + + renderFunctionPage(outFile, renderContextStr); + + ro.resetRenderSpecificOptions(); + } + + std::string RenderMachineFunction::escapeChars(const std::string &s) const { + return escapeChars(s.begin(), s.end()); + } + +} diff --git a/contrib/llvm/lib/CodeGen/RenderMachineFunction.h b/contrib/llvm/lib/CodeGen/RenderMachineFunction.h new file mode 100644 index 0000000..8d56a82 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RenderMachineFunction.h @@ -0,0 +1,336 @@ +//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H +#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H + +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <algorithm> +#include <map> +#include <set> +#include <string> + +namespace llvm { + + class LiveInterval; + class LiveIntervals; + class MachineInstr; + class MachineRegisterInfo; + class RenderMachineFunction; + class TargetRegisterClass; + class TargetRegisterInfo; + class VirtRegMap; + class raw_ostream; + + /// \brief Helper class to process rendering options. Tries to be as lazy as + /// possible. + class MFRenderingOptions { + public: + + struct RegClassComp { + bool operator()(const TargetRegisterClass *trc1, + const TargetRegisterClass *trc2) const { + std::string trc1Name(trc1->getName()), trc2Name(trc2->getName()); + return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(), + trc2Name.begin(), trc2Name.end()); + } + }; + + typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet; + + struct IntervalComp { + bool operator()(const LiveInterval *li1, const LiveInterval *li2) const { + return li1->reg < li2->reg; + } + }; + + typedef std::set<const LiveInterval*, IntervalComp> IntervalSet; + + /// Initialise the rendering options. + void setup(MachineFunction *mf, const TargetRegisterInfo *tri, + LiveIntervals *lis, const RenderMachineFunction *rmf); + + /// Clear translations of options to the current function. + void clear(); + + /// Reset any options computed for this specific rendering. + void resetRenderSpecificOptions(); + + /// Should we render the current function. + bool shouldRenderCurrentMachineFunction() const; + + /// Return the set of register classes to render pressure for. + const RegClassSet& regClasses() const; + + /// Return the set of live intervals to render liveness for. + const IntervalSet& intervals() const; + + /// Render indexes which are not associated with instructions / MBB starts. + bool renderEmptyIndexes() const; + + /// Return whether or not to render using SVG for fancy vertical text. + bool fancyVerticals() const; + + private: + + static bool renderingOptionsProcessed; + static std::set<std::string> mfNamesToRender; + static bool renderAllMFs; + + static std::set<std::string> classNamesToRender; + static bool renderAllClasses; + + + static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender; + typedef enum { ExplicitOnly = 0, + AllPhys = 1, + VirtNoSpills = 2, + VirtSpills = 4, + AllVirt = 6, + All = 7 } + IntervalTypesToRender; + static unsigned intervalTypesToRender; + + template <typename OutputItr> + static void splitComaSeperatedList(const std::string &s, OutputItr outItr); + + static void processOptions(); + + static void processFuncNames(); + static void processRegClassNames(); + static void processIntervalNumbers(); + + static void processIntervalRange(const std::string &intervalRangeStr); + + MachineFunction *mf; + const TargetRegisterInfo *tri; + LiveIntervals *lis; + const RenderMachineFunction *rmf; + + mutable bool regClassesTranslatedToCurrentFunction; + mutable RegClassSet regClassSet; + + mutable bool intervalsTranslatedToCurrentFunction; + mutable IntervalSet intervalSet; + + void translateRegClassNamesToCurrentFunction() const; + + void translateIntervalNumbersToCurrentFunction() const; + }; + + /// \brief Provide extra information about the physical and virtual registers + /// in the function being compiled. + class TargetRegisterExtraInfo { + public: + TargetRegisterExtraInfo(); + + /// \brief Set up TargetRegisterExtraInfo with pointers to necessary + /// sources of information. + void setup(MachineFunction *mf, MachineRegisterInfo *mri, + const TargetRegisterInfo *tri, LiveIntervals *lis); + + /// \brief Recompute tables for changed function. + void reset(); + + /// \brief Free all tables in TargetRegisterExtraInfo. + void clear(); + + /// \brief Maximum number of registers from trc which alias reg. + unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const; + + /// \brief Returns the number of allocable registers in trc. + unsigned getCapacity(const TargetRegisterClass *trc) const; + + /// \brief Return the number of registers of class trc that may be + /// needed at slot i. + unsigned getPressureAtSlot(const TargetRegisterClass *trc, + SlotIndex i) const; + + /// \brief Return true if the number of registers of type trc that may be + /// needed at slot i is greater than the capacity of trc. + bool classOverCapacityAtSlot(const TargetRegisterClass *trc, + SlotIndex i) const; + + private: + + MachineFunction *mf; + MachineRegisterInfo *mri; + const TargetRegisterInfo *tri; + LiveIntervals *lis; + + typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine; + typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap; + VRWorstMap vrWorst; + + typedef std::map<unsigned, WorstMapLine> PRWorstMap; + PRWorstMap prWorst; + + typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap; + CapacityMap capacityMap; + + typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine; + typedef std::map<SlotIndex, PressureMapLine> PressureMap; + PressureMap pressureMap; + + bool mapsPopulated; + + /// \brief Initialise the 'worst' table. + void initWorst(); + + /// \brief Initialise the 'capacity' table. + void initCapacity(); + + /// \brief Initialise/Reset the 'pressure' and live states tables. + void resetPressureAndLiveStates(); + }; + + /// \brief Render MachineFunction objects and related information to a HTML + /// page. + class RenderMachineFunction : public MachineFunctionPass { + public: + static char ID; + + RenderMachineFunction() : MachineFunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + virtual bool runOnMachineFunction(MachineFunction &fn); + + virtual void releaseMemory(); + + void rememberUseDefs(const LiveInterval *li); + + void rememberSpills(const LiveInterval *li, + const std::vector<LiveInterval*> &spills); + + bool isSpill(const LiveInterval *li) const; + + /// \brief Render this machine function to HTML. + /// + /// @param renderContextStr This parameter will be included in the top of + /// the html file to explain where (in the + /// codegen pipeline) this function was rendered + /// from. Set it to something like + /// "Pre-register-allocation". + /// @param vrm If non-null the VRM will be queried to determine + /// whether a virtual register was allocated to a + /// physical register or spilled. + /// @param renderFilePrefix This string will be appended to the function + /// name (before the output file suffix) to enable + /// multiple renderings from the same function. + void renderMachineFunction(const char *renderContextStr, + const VirtRegMap *vrm = 0, + const char *renderSuffix = 0); + + private: + class Spacer; + friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s); + + std::string fqn; + + MachineFunction *mf; + MachineRegisterInfo *mri; + const TargetRegisterInfo *tri; + LiveIntervals *lis; + SlotIndexes *sis; + const VirtRegMap *vrm; + + TargetRegisterExtraInfo trei; + MFRenderingOptions ro; + + + + // Utilities. + typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState; + LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const; + + typedef enum { Zero, Low, High } PressureState; + PressureState getPressureStateAt(const TargetRegisterClass *trc, + SlotIndex i) const; + + typedef std::map<const LiveInterval*, std::set<const LiveInterval*> > + SpillIntervals; + SpillIntervals spillIntervals; + + typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap; + SpillForMap spillFor; + + typedef std::set<SlotIndex> SlotSet; + typedef std::map<const LiveInterval*, SlotSet> UseDefs; + UseDefs useDefs; + + // ---------- Rendering methods ---------- + + /// For inserting spaces when pretty printing. + class Spacer { + public: + explicit Spacer(unsigned numSpaces) : ns(numSpaces) {} + Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); } + void print(raw_ostream &os) const; + private: + unsigned ns; + }; + + Spacer s(unsigned ns) const; + + template <typename Iterator> + std::string escapeChars(Iterator sBegin, Iterator sEnd) const; + + /// \brief Render a machine instruction. + void renderMachineInstr(raw_ostream &os, + const MachineInstr *mi) const; + + /// \brief Render vertical text. + template <typename T> + void renderVertical(const Spacer &indent, + raw_ostream &os, + const T &t) const; + + /// \brief Insert CSS layout info. + void insertCSS(const Spacer &indent, + raw_ostream &os) const; + + /// \brief Render a brief summary of the function (including rendering + /// context). + void renderFunctionSummary(const Spacer &indent, + raw_ostream &os, + const char * const renderContextStr) const; + + /// \brief Render a legend for the pressure table. + void renderPressureTableLegend(const Spacer &indent, + raw_ostream &os) const; + + /// \brief Render a consecutive set of HTML cells of the same class using + /// the colspan attribute for run-length encoding. + template <typename CellType> + void renderCellsWithRLE( + const Spacer &indent, raw_ostream &os, + const std::pair<CellType, unsigned> &rleAccumulator, + const std::map<CellType, std::string> &cellTypeStrs) const; + + /// \brief Render code listing, potentially with register pressure + /// and live intervals shown alongside. + void renderCodeTablePlusPI(const Spacer &indent, + raw_ostream &os) const; + + /// \brief Render the HTML page representing the MachineFunction. + void renderFunctionPage(raw_ostream &os, + const char * const renderContextStr) const; + + std::string escapeChars(const std::string &s) const; + }; +} + +#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */ diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp new file mode 100644 index 0000000..7d39dc4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -0,0 +1,586 @@ +//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <climits> +using namespace llvm; + +ScheduleDAG::ScheduleDAG(MachineFunction &mf) + : TM(mf.getTarget()), + TII(TM.getInstrInfo()), + TRI(TM.getRegisterInfo()), + MF(mf), MRI(mf.getRegInfo()), + EntrySU(), ExitSU() { +} + +ScheduleDAG::~ScheduleDAG() {} + +/// dump - dump the schedule. +void ScheduleDAG::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} + + +/// Run - perform scheduling. +/// +void ScheduleDAG::Run(MachineBasicBlock *bb, + MachineBasicBlock::iterator insertPos) { + BB = bb; + InsertPos = insertPos; + + SUnits.clear(); + Sequence.clear(); + EntrySU = SUnit(); + ExitSU = SUnit(); + + Schedule(); + + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +/// addPred - This adds the specified edge as a pred of the current node if +/// not already. It also adds the current node as a successor of the +/// specified node. +void SUnit::addPred(const SDep &D) { + // If this node already has this depenence, don't add a redundant one. + for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) + if (*I == D) + return; + // Now add a corresponding succ to N. + SDep P = D; + P.setSUnit(this); + SUnit *N = D.getSUnit(); + // Update the bookkeeping. + if (D.getKind() == SDep::Data) { + assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); + assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); + ++NumPreds; + ++N->NumSuccs; + } + if (!N->isScheduled) { + assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); + ++NumPredsLeft; + } + if (!isScheduled) { + assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); + ++N->NumSuccsLeft; + } + Preds.push_back(D); + N->Succs.push_back(P); + if (P.getLatency() != 0) { + this->setDepthDirty(); + N->setHeightDirty(); + } +} + +/// removePred - This removes the specified edge as a pred of the current +/// node if it exists. It also removes the current node as a successor of +/// the specified node. +void SUnit::removePred(const SDep &D) { + // Find the matching predecessor. + for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) + if (*I == D) { + bool FoundSucc = false; + // Find the corresponding successor in N. + SDep P = D; + P.setSUnit(this); + SUnit *N = D.getSUnit(); + for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(), + EE = N->Succs.end(); II != EE; ++II) + if (*II == P) { + FoundSucc = true; + N->Succs.erase(II); + break; + } + assert(FoundSucc && "Mismatching preds / succs lists!"); + Preds.erase(I); + // Update the bookkeeping. + if (P.getKind() == SDep::Data) { + assert(NumPreds > 0 && "NumPreds will underflow!"); + assert(N->NumSuccs > 0 && "NumSuccs will underflow!"); + --NumPreds; + --N->NumSuccs; + } + if (!N->isScheduled) { + assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); + --NumPredsLeft; + } + if (!isScheduled) { + assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); + --N->NumSuccsLeft; + } + if (P.getLatency() != 0) { + this->setDepthDirty(); + N->setHeightDirty(); + } + return; + } +} + +void SUnit::setDepthDirty() { + if (!isDepthCurrent) return; + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *SU = WorkList.pop_back_val(); + SU->isDepthCurrent = false; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), + E = SU->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isDepthCurrent) + WorkList.push_back(SuccSU); + } + } while (!WorkList.empty()); +} + +void SUnit::setHeightDirty() { + if (!isHeightCurrent) return; + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *SU = WorkList.pop_back_val(); + SU->isHeightCurrent = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), + E = SU->Preds.end(); I != E; ++I) { + SUnit *PredSU = I->getSUnit(); + if (PredSU->isHeightCurrent) + WorkList.push_back(PredSU); + } + } while (!WorkList.empty()); +} + +/// setDepthToAtLeast - Update this node's successors to reflect the +/// fact that this node's depth just increased. +/// +void SUnit::setDepthToAtLeast(unsigned NewDepth) { + if (NewDepth <= getDepth()) + return; + setDepthDirty(); + Depth = NewDepth; + isDepthCurrent = true; +} + +/// setHeightToAtLeast - Update this node's predecessors to reflect the +/// fact that this node's height just increased. +/// +void SUnit::setHeightToAtLeast(unsigned NewHeight) { + if (NewHeight <= getHeight()) + return; + setHeightDirty(); + Height = NewHeight; + isHeightCurrent = true; +} + +/// ComputeDepth - Calculate the maximal path from the node to the exit. +/// +void SUnit::ComputeDepth() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxPredDepth = 0; + for (SUnit::const_pred_iterator I = Cur->Preds.begin(), + E = Cur->Preds.end(); I != E; ++I) { + SUnit *PredSU = I->getSUnit(); + if (PredSU->isDepthCurrent) + MaxPredDepth = std::max(MaxPredDepth, + PredSU->Depth + I->getLatency()); + else { + Done = false; + WorkList.push_back(PredSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxPredDepth != Cur->Depth) { + Cur->setDepthDirty(); + Cur->Depth = MaxPredDepth; + } + Cur->isDepthCurrent = true; + } + } while (!WorkList.empty()); +} + +/// ComputeHeight - Calculate the maximal path from the node to the entry. +/// +void SUnit::ComputeHeight() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxSuccHeight = 0; + for (SUnit::const_succ_iterator I = Cur->Succs.begin(), + E = Cur->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isHeightCurrent) + MaxSuccHeight = std::max(MaxSuccHeight, + SuccSU->Height + I->getLatency()); + else { + Done = false; + WorkList.push_back(SuccSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxSuccHeight != Cur->Height) { + Cur->setHeightDirty(); + Cur->Height = MaxSuccHeight; + } + Cur->isHeightCurrent = true; + } + } while (!WorkList.empty()); +} + +/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or +/// a group of nodes flagged together. +void SUnit::dump(const ScheduleDAG *G) const { + dbgs() << "SU(" << NodeNum << "): "; + G->dumpNode(this); +} + +void SUnit::dumpAll(const ScheduleDAG *G) const { + dump(G); + + dbgs() << " # preds left : " << NumPredsLeft << "\n"; + dbgs() << " # succs left : " << NumSuccsLeft << "\n"; + dbgs() << " Latency : " << Latency << "\n"; + dbgs() << " Depth : " << Depth << "\n"; + dbgs() << " Height : " << Height << "\n"; + + if (Preds.size() != 0) { + dbgs() << " Predecessors:\n"; + for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { + dbgs() << " "; + switch (I->getKind()) { + case SDep::Data: dbgs() << "val "; break; + case SDep::Anti: dbgs() << "anti"; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ch "; break; + } + dbgs() << "#"; + dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + if (I->isArtificial()) + dbgs() << " *"; + dbgs() << ": Latency=" << I->getLatency(); + dbgs() << "\n"; + } + } + if (Succs.size() != 0) { + dbgs() << " Successors:\n"; + for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); + I != E; ++I) { + dbgs() << " "; + switch (I->getKind()) { + case SDep::Data: dbgs() << "val "; break; + case SDep::Anti: dbgs() << "anti"; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ch "; break; + } + dbgs() << "#"; + dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + if (I->isArtificial()) + dbgs() << " *"; + dbgs() << ": Latency=" << I->getLatency(); + dbgs() << "\n"; + } + } + dbgs() << "\n"; +} + +#ifndef NDEBUG +/// VerifySchedule - Verify that all SUnits were scheduled and that +/// their state is consistent. +/// +void ScheduleDAG::VerifySchedule(bool isBottomUp) { + bool AnyNotSched = false; + unsigned DeadNodes = 0; + unsigned Noops = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (!SUnits[i].isScheduled) { + if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { + ++DeadNodes; + continue; + } + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has not been scheduled!\n"; + AnyNotSched = true; + } + if (SUnits[i].isScheduled && + (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) > + unsigned(INT_MAX)) { + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has an unexpected " + << (isBottomUp ? "Height" : "Depth") << " value!\n"; + AnyNotSched = true; + } + if (isBottomUp) { + if (SUnits[i].NumSuccsLeft != 0) { + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has successors left!\n"; + AnyNotSched = true; + } + } else { + if (SUnits[i].NumPredsLeft != 0) { + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has predecessors left!\n"; + AnyNotSched = true; + } + } + } + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(!AnyNotSched); + assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && + "The number of nodes scheduled doesn't match the expected number!"); +} +#endif + +/// InitDAGTopologicalSorting - create the initial topological +/// ordering from the DAG to be scheduled. +/// +/// The idea of the algorithm is taken from +/// "Online algorithms for managing the topological order of +/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly +/// This is the MNR algorithm, which was first introduced by +/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in +/// "Maintaining a topological order under edge insertions". +/// +/// Short description of the algorithm: +/// +/// Topological ordering, ord, of a DAG maps each node to a topological +/// index so that for all edges X->Y it is the case that ord(X) < ord(Y). +/// +/// This means that if there is a path from the node X to the node Z, +/// then ord(X) < ord(Z). +/// +/// This property can be used to check for reachability of nodes: +/// if Z is reachable from X, then an insertion of the edge Z->X would +/// create a cycle. +/// +/// The algorithm first computes a topological ordering for the DAG by +/// initializing the Index2Node and Node2Index arrays and then tries to keep +/// the ordering up-to-date after edge insertions by reordering the DAG. +/// +/// On insertion of the edge X->Y, the algorithm first marks by calling DFS +/// the nodes reachable from Y, and then shifts them using Shift to lie +/// immediately after X in Index2Node. +void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { + unsigned DAGSize = SUnits.size(); + std::vector<SUnit*> WorkList; + WorkList.reserve(DAGSize); + + Index2Node.resize(DAGSize); + Node2Index.resize(DAGSize); + + // Initialize the data structures. + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + int NodeNum = SU->NodeNum; + unsigned Degree = SU->Succs.size(); + // Temporarily use the Node2Index array as scratch space for degree counts. + Node2Index[NodeNum] = Degree; + + // Is it a node without dependencies? + if (Degree == 0) { + assert(SU->Succs.empty() && "SUnit should have no successors"); + // Collect leaf nodes. + WorkList.push_back(SU); + } + } + + int Id = DAGSize; + while (!WorkList.empty()) { + SUnit *SU = WorkList.back(); + WorkList.pop_back(); + Allocate(SU->NodeNum, --Id); + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit *SU = I->getSUnit(); + if (!--Node2Index[SU->NodeNum]) + // If all dependencies of the node are processed already, + // then the node can be computed now. + WorkList.push_back(SU); + } + } + + Visited.resize(DAGSize); + +#ifndef NDEBUG + // Check correctness of the ordering + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && + "Wrong topological sorting"); + } + } +#endif +} + +/// AddPred - Updates the topological ordering to accomodate an edge +/// to be added from SUnit X to SUnit Y. +void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { + int UpperBound, LowerBound; + LowerBound = Node2Index[Y->NodeNum]; + UpperBound = Node2Index[X->NodeNum]; + bool HasLoop = false; + // Is Ord(X) < Ord(Y) ? + if (LowerBound < UpperBound) { + // Update the topological order. + Visited.reset(); + DFS(Y, UpperBound, HasLoop); + assert(!HasLoop && "Inserted edge creates a loop!"); + // Recompute topological indexes. + Shift(Visited, LowerBound, UpperBound); + } +} + +/// RemovePred - Updates the topological ordering to accomodate an +/// an edge to be removed from the specified node N from the predecessors +/// of the current node M. +void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { + // InitDAGTopologicalSorting(); +} + +/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark +/// all nodes affected by the edge insertion. These nodes will later get new +/// topological indexes by means of the Shift method. +void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, + bool& HasLoop) { + std::vector<const SUnit*> WorkList; + WorkList.reserve(SUnits.size()); + + WorkList.push_back(SU); + do { + SU = WorkList.back(); + WorkList.pop_back(); + Visited.set(SU->NodeNum); + for (int I = SU->Succs.size()-1; I >= 0; --I) { + int s = SU->Succs[I].getSUnit()->NodeNum; + if (Node2Index[s] == UpperBound) { + HasLoop = true; + return; + } + // Visit successors if not already and in affected region. + if (!Visited.test(s) && Node2Index[s] < UpperBound) { + WorkList.push_back(SU->Succs[I].getSUnit()); + } + } + } while (!WorkList.empty()); +} + +/// Shift - Renumber the nodes so that the topological ordering is +/// preserved. +void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, + int UpperBound) { + std::vector<int> L; + int shift = 0; + int i; + + for (i = LowerBound; i <= UpperBound; ++i) { + // w is node at topological index i. + int w = Index2Node[i]; + if (Visited.test(w)) { + // Unmark. + Visited.reset(w); + L.push_back(w); + shift = shift + 1; + } else { + Allocate(w, i - shift); + } + } + + for (unsigned j = 0; j < L.size(); ++j) { + Allocate(L[j], i - shift); + i = i + 1; + } +} + + +/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will +/// create a cycle. +bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + if (IsReachable(TargetSU, SU)) + return true; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (I->isAssignedRegDep() && + IsReachable(TargetSU, I->getSUnit())) + return true; + return false; +} + +/// IsReachable - Checks if SU is reachable from TargetSU. +bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, + const SUnit *TargetSU) { + // If insertion of the edge SU->TargetSU would create a cycle + // then there is a path from TargetSU to SU. + int UpperBound, LowerBound; + LowerBound = Node2Index[TargetSU->NodeNum]; + UpperBound = Node2Index[SU->NodeNum]; + bool HasLoop = false; + // Is Ord(TargetSU) < Ord(SU) ? + if (LowerBound < UpperBound) { + Visited.reset(); + // There may be a path from TargetSU to SU. Check for it. + DFS(TargetSU, UpperBound, HasLoop); + } + return HasLoop; +} + +/// Allocate - assign the topological index to the node n. +void ScheduleDAGTopologicalSort::Allocate(int n, int index) { + Node2Index[n] = index; + Index2Node[index] = n; +} + +ScheduleDAGTopologicalSort:: +ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits) : SUnits(sunits) {} + +ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {} diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp new file mode 100644 index 0000000..0a2fb37 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp @@ -0,0 +1,67 @@ +//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the ScheduleDAG class, which creates +// MachineInstrs according to the computed schedule. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +void ScheduleDAG::EmitNoop() { + TII->insertNoop(*BB, InsertPos); +} + +void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, + DenseMap<SUnit*, unsigned> &VRBaseMap) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->CopyDstRC) { + // Copy to physical register. + DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); + assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); + // Find the destination physical register. + unsigned Reg = 0; + for (SUnit::const_succ_iterator II = SU->Succs.begin(), + EE = SU->Succs.end(); II != EE; ++II) { + if (II->getReg()) { + Reg = II->getReg(); + break; + } + } + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); + } else { + // Copy from physical register. + assert(I->getReg() && "Unknown physical register!"); + unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); + } + break; + } +} diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp new file mode 100644 index 0000000..ea93dd5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -0,0 +1,620 @@ +//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAGInstrs class, which implements re-scheduling +// of MachineInstrs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sched-instrs" +#include "ScheduleDAGInstrs.h" +#include "llvm/Operator.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallSet.h" +using namespace llvm; + +ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, + const MachineLoopInfo &mli, + const MachineDominatorTree &mdt) + : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()), + Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) { + MFI = mf.getFrameInfo(); + DbgValueVec.clear(); +} + +/// Run - perform scheduling. +/// +void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + BB = bb; + Begin = begin; + InsertPosIndex = endcount; + + ScheduleDAG::Run(bb, end); +} + +/// getUnderlyingObjectFromInt - This is the function that does the work of +/// looking through basic ptrtoint+arithmetic+inttoptr sequences. +static const Value *getUnderlyingObjectFromInt(const Value *V) { + do { + if (const Operator *U = dyn_cast<Operator>(V)) { + // If we find a ptrtoint, we can transfer control back to the + // regular getUnderlyingObjectFromInt. + if (U->getOpcode() == Instruction::PtrToInt) + return U->getOperand(0); + // If we find an add of a constant or a multiplied value, it's + // likely that the other operand will lead us to the base + // object. We don't have to worry about the case where the + // object address is somehow being computed by the multiply, + // because our callers only care when the result is an + // identifibale object. + if (U->getOpcode() != Instruction::Add || + (!isa<ConstantInt>(U->getOperand(1)) && + Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) + return V; + V = U->getOperand(0); + } else { + return V; + } + assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); + } while (1); +} + +/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject +/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. +static const Value *getUnderlyingObject(const Value *V) { + // First just call Value::getUnderlyingObject to let it do what it does. + do { + V = V->getUnderlyingObject(); + // If it found an inttoptr, use special code to continue climing. + if (Operator::getOpcode(V) != Instruction::IntToPtr) + break; + const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); + // If that succeeded in finding a pointer, continue the search. + if (!O->getType()->isPointerTy()) + break; + V = O; + } while (1); + return V; +} + +/// getUnderlyingObjectForInstr - If this machine instr has memory reference +/// information and it can be tracked to a normal reference to a known +/// object, return the Value for that object. Otherwise return null. +static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, + const MachineFrameInfo *MFI, + bool &MayAlias) { + MayAlias = true; + if (!MI->hasOneMemOperand() || + !(*MI->memoperands_begin())->getValue() || + (*MI->memoperands_begin())->isVolatile()) + return 0; + + const Value *V = (*MI->memoperands_begin())->getValue(); + if (!V) + return 0; + + V = getUnderlyingObject(V); + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (PSV->isAliased(MFI)) + return 0; + + MayAlias = PSV->mayAlias(MFI); + return V; + } + + if (isIdentifiedObject(V)) + return V; + + return 0; +} + +void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { + if (MachineLoop *ML = MLI.getLoopFor(BB)) + if (BB == ML->getLoopLatch()) { + MachineBasicBlock *Header = ML->getHeader(); + for (MachineBasicBlock::livein_iterator I = Header->livein_begin(), + E = Header->livein_end(); I != E; ++I) + LoopLiveInRegs.insert(*I); + LoopRegs.VisitLoop(ML); + } +} + +void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { + // We'll be allocating one SUnit for each instruction, plus one for + // the region exit node. + SUnits.reserve(BB->size()); + + // We build scheduling units by walking a block's instruction list from bottom + // to top. + + // Remember where a generic side-effecting instruction is as we procede. + SUnit *BarrierChain = 0, *AliasChain = 0; + + // Memory references to specific known memory locations are tracked + // so that they can be given more precise dependencies. We track + // separately the known memory locations that may alias and those + // that are known not to alias + std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; + std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; + + // Keep track of dangling debug references to registers. + std::vector<std::pair<MachineInstr*, unsigned> > + DanglingDebugValue(TRI->getNumRegs(), + std::make_pair(static_cast<MachineInstr*>(0), 0)); + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = ForceUnitLatencies(); + + // Ask the target if address-backscheduling is desirable, and if so how much. + const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); + unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); + + // Remove any stale debug info; sometimes BuildSchedGraph is called again + // without emitting the info from the previous call. + DbgValueVec.clear(); + + // Walk the list of instructions, from bottom moving up. + for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; + MII != MIE; --MII) { + MachineInstr *MI = prior(MII); + // DBG_VALUE does not have SUnit's built, so just remember these for later + // reinsertion. + if (MI->isDebugValue()) { + if (MI->getNumOperands()==3 && MI->getOperand(0).isReg() && + MI->getOperand(0).getReg()) + DanglingDebugValue[MI->getOperand(0).getReg()] = + std::make_pair(MI, DbgValueVec.size()); + DbgValueVec.push_back(MI); + continue; + } + const TargetInstrDesc &TID = MI->getDesc(); + assert(!TID.isTerminator() && !MI->isLabel() && + "Cannot schedule terminators or labels!"); + // Create the SUnit for this MI. + SUnit *SU = NewSUnit(MI); + + // Assign the Latency field of SU using target-provided information. + if (UnitLatencies) + SU->Latency = 1; + else + ComputeLatency(SU); + + // Add register-based dependencies (data, anti, and output). + for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); + + if (MO.isDef() && DanglingDebugValue[Reg].first!=0) { + SU->DbgInstrList.push_back(DanglingDebugValue[Reg].first); + DbgValueVec[DanglingDebugValue[Reg].second] = 0; + DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0); + } + + std::vector<SUnit *> &UseList = Uses[Reg]; + std::vector<SUnit *> &DefList = Defs[Reg]; + // Optionally add output and anti dependencies. For anti + // dependencies we use a latency of 0 because for a multi-issue + // target we want to allow the defining instruction to issue + // in the same cycle as the using instruction. + // TODO: Using a latency of 1 here for output dependencies assumes + // there's no cost for reusing registers. + SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; + unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1; + for (unsigned i = 0, e = DefList.size(); i != e; ++i) { + SUnit *DefSU = DefList[i]; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(Reg))) + DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg)); + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + std::vector<SUnit *> &DefList = Defs[*Alias]; + for (unsigned i = 0, e = DefList.size(); i != e; ++i) { + SUnit *DefSU = DefList[i]; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(*Alias))) + DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias)); + } + } + + if (MO.isDef()) { + // Add any data dependencies. + unsigned DataLatency = SU->Latency; + for (unsigned i = 0, e = UseList.size(); i != e; ++i) { + SUnit *UseSU = UseList[i]; + if (UseSU == SU) + continue; + unsigned LDataLatency = DataLatency; + // Optionally add in a special extra latency for nodes that + // feed addresses. + // TODO: Do this for register aliases too. + // TODO: Perhaps we should get rid of + // SpecialAddressLatency and just move this into + // adjustSchedDependency for the targets that care about it. + if (SpecialAddressLatency != 0 && !UnitLatencies) { + MachineInstr *UseMI = UseSU->getInstr(); + const TargetInstrDesc &UseTID = UseMI->getDesc(); + int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); + assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); + if ((UseTID.mayLoad() || UseTID.mayStore()) && + (unsigned)RegUseIndex < UseTID.getNumOperands() && + UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) + LDataLatency += SpecialAddressLatency; + } + // Adjust the dependence latency using operand def/use + // information (if any), and then allow the target to + // perform its own adjustments. + const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); + if (!UnitLatencies) { + ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); + ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); + } + UseSU->addPred(dep); + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + std::vector<SUnit *> &UseList = Uses[*Alias]; + for (unsigned i = 0, e = UseList.size(); i != e; ++i) { + SUnit *UseSU = UseList[i]; + if (UseSU == SU) + continue; + const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); + if (!UnitLatencies) { + ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); + ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); + } + UseSU->addPred(dep); + } + } + + // If a def is going to wrap back around to the top of the loop, + // backschedule it. + if (!UnitLatencies && DefList.empty()) { + LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); + if (I != LoopRegs.Deps.end()) { + const MachineOperand *UseMO = I->second.first; + unsigned Count = I->second.second; + const MachineInstr *UseMI = UseMO->getParent(); + unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); + const TargetInstrDesc &UseTID = UseMI->getDesc(); + // TODO: If we knew the total depth of the region here, we could + // handle the case where the whole loop is inside the region but + // is large enough that the isScheduleHigh trick isn't needed. + if (UseMOIdx < UseTID.getNumOperands()) { + // Currently, we only support scheduling regions consisting of + // single basic blocks. Check to see if the instruction is in + // the same region by checking to see if it has the same parent. + if (UseMI->getParent() != MI->getParent()) { + unsigned Latency = SU->Latency; + if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) + Latency += SpecialAddressLatency; + // This is a wild guess as to the portion of the latency which + // will be overlapped by work done outside the current + // scheduling region. + Latency -= std::min(Latency, Count); + // Add the artifical edge. + ExitSU.addPred(SDep(SU, SDep::Order, Latency, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + } else if (SpecialAddressLatency > 0 && + UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { + // The entire loop body is within the current scheduling region + // and the latency of this operation is assumed to be greater + // than the latency of the loop. + // TODO: Recursively mark data-edge predecessors as + // isScheduleHigh too. + SU->isScheduleHigh = true; + } + } + LoopRegs.Deps.erase(I); + } + } + + UseList.clear(); + if (!MO.isDead()) + DefList.clear(); + DefList.push_back(SU); + } else { + UseList.push_back(SU); + } + } + + // Add chain dependencies. + // Chain dependencies used to enforce memory order should have + // latency of 0 (except for true dependency of Store followed by + // aliased Load... we estimate that with a single cycle of latency + // assuming the hardware will bypass) + // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable + // after stack slots are lowered to actual addresses. + // TODO: Use an AliasAnalysis and do real alias-analysis queries, and + // produce more precise dependence information. +#define STORE_LOAD_LATENCY 1 + unsigned TrueMemOrderLatency = 0; + if (TID.isCall() || TID.hasUnmodeledSideEffects() || + (MI->hasVolatileMemoryRef() && + (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) { + // Be conservative with these and add dependencies on all memory + // references, even those that are known to not alias. + for (std::map<const Value *, SUnit *>::iterator I = + NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { + I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + } + for (std::map<const Value *, std::vector<SUnit *> >::iterator I = + NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + } + NonAliasMemDefs.clear(); + NonAliasMemUses.clear(); + // Add SU to the barrier chain. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + BarrierChain = SU; + + // fall-through + new_alias_chain: + // Chain all possibly aliasing memory references though SU. + if (AliasChain) + AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + AliasChain = SU; + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), + E = AliasMemDefs.end(); I != E; ++I) { + I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + } + for (std::map<const Value *, std::vector<SUnit *> >::iterator I = + AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + } + PendingLoads.clear(); + AliasMemDefs.clear(); + AliasMemUses.clear(); + } else if (TID.mayStore()) { + bool MayAlias = true; + TrueMemOrderLatency = STORE_LOAD_LATENCY; + if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { + // A store to a specific PseudoSourceValue. Add precise dependencies. + // Record the def in MemDefs, first adding a dep if there is + // an existing def. + std::map<const Value *, SUnit *>::iterator I = + ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + std::map<const Value *, SUnit *>::iterator IE = + ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) { + I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, + /*isNormalMemory=*/true)); + I->second = SU; + } else { + if (MayAlias) + AliasMemDefs[V] = SU; + else + NonAliasMemDefs[V] = SU; + } + // Handle the uses in MemUses, if there are any. + std::map<const Value *, std::vector<SUnit *> >::iterator J = + ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); + std::map<const Value *, std::vector<SUnit *> >::iterator JE = + ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); + if (J != JE) { + for (unsigned i = 0, e = J->second.size(); i != e; ++i) + J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, + /*Reg=*/0, /*isNormalMemory=*/true)); + J->second.clear(); + } + if (MayAlias) { + // Add dependencies from all the PendingLoads, i.e. loads + // with no underlying object. + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + // Add dependence on alias chain, if needed. + if (AliasChain) + AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + } + // Add dependence on barrier chain, if needed. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + } else { + // Treat all other stores conservatively. + goto new_alias_chain; + } + } else if (TID.mayLoad()) { + bool MayAlias = true; + TrueMemOrderLatency = 0; + if (MI->isInvariantLoad(AA)) { + // Invariant load, no chain dependencies needed! + } else { + if (const Value *V = + getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { + // A load from a specific PseudoSourceValue. Add precise dependencies. + std::map<const Value *, SUnit *>::iterator I = + ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + std::map<const Value *, SUnit *>::iterator IE = + ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) + I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, + /*isNormalMemory=*/true)); + if (MayAlias) + AliasMemUses[V].push_back(SU); + else + NonAliasMemUses[V].push_back(SU); + } else { + // A load with no underlying object. Depend on all + // potentially aliasing stores. + for (std::map<const Value *, SUnit *>::iterator I = + AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) + I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + + PendingLoads.push_back(SU); + MayAlias = true; + } + + // Add dependencies on alias and barrier chains, if needed. + if (MayAlias && AliasChain) + AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + } + } + } + + for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { + Defs[i].clear(); + Uses[i].clear(); + } + PendingLoads.clear(); +} + +void ScheduleDAGInstrs::FinishBlock() { + // Nothing to do. +} + +void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + + // Compute the latency for the node. + SU->Latency = + InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass()); + + // Simplistic target-independent heuristic: assume that loads take + // extra time. + if (InstrItins.isEmpty()) + if (SU->getInstr()->getDesc().mayLoad()) + SU->Latency += 2; +} + +void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, + SDep& dep) const { + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + if (InstrItins.isEmpty()) + return; + + // For a data dependency with a known register... + if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) + return; + + const unsigned Reg = dep.getReg(); + + // ... find the definition of the register in the defining + // instruction + MachineInstr *DefMI = Def->getInstr(); + int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); + if (DefIdx != -1) { + int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), + DefIdx); + if (DefCycle >= 0) { + MachineInstr *UseMI = Use->getInstr(); + const unsigned UseClass = UseMI->getDesc().getSchedClass(); + + // For all uses of the register, calculate the maxmimum latency + int Latency = -1; + for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = UseMI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (MOReg != Reg) + continue; + + int UseCycle = InstrItins.getOperandCycle(UseClass, i); + if (UseCycle >= 0) + Latency = std::max(Latency, DefCycle - UseCycle + 1); + } + + // If we found a latency, then replace the existing dependence latency. + if (Latency >= 0) + dep.setLatency(Latency); + } + } +} + +void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { + SU->getInstr()->dump(); +} + +std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream oss(s); + if (SU == &EntrySU) + oss << "<entry>"; + else if (SU == &ExitSU) + oss << "<exit>"; + else + SU->getInstr()->print(oss); + return oss.str(); +} + +// EmitSchedule - Emit the machine code in scheduled order. +MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { + // For MachineInstr-based scheduling, we're rescheduling the instructions in + // the block, so start by removing them from the block. + while (Begin != InsertPos) { + MachineBasicBlock::iterator I = Begin; + ++Begin; + BB->remove(I); + } + + // First reinsert any remaining debug_values; these are either constants, + // or refer to live-in registers. The beginning of the block is the right + // place for the latter. The former might reasonably be placed elsewhere + // using some kind of ordering algorithm, but right now it doesn't matter. + for (int i = DbgValueVec.size()-1; i>=0; --i) + if (DbgValueVec[i]) + BB->insert(InsertPos, DbgValueVec[i]); + + // Then re-insert them according to the given schedule. + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + EmitNoop(); + continue; + } + + BB->insert(InsertPos, SU->getInstr()); + for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) + BB->insert(InsertPos, SU->DbgInstrList[i]); + } + + // Update the Begin iterator, as the first instruction in the block + // may have been scheduled later. + if (!DbgValueVec.empty()) { + for (int i = DbgValueVec.size()-1; i>=0; --i) + if (DbgValueVec[i]!=0) { + Begin = DbgValueVec[DbgValueVec.size()-1]; + break; + } + } else if (!Sequence.empty()) + Begin = Sequence[0]->getInstr(); + + DbgValueVec.clear(); + return BB; +} diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h new file mode 100644 index 0000000..c8f543f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h @@ -0,0 +1,197 @@ +//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScheduleDAGInstrs class, which implements +// scheduling for a MachineInstr-based dependency graph. +// +//===----------------------------------------------------------------------===// + +#ifndef SCHEDULEDAGINSTRS_H +#define SCHEDULEDAGINSTRS_H + +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallSet.h" +#include <map> + +namespace llvm { + class MachineLoopInfo; + class MachineDominatorTree; + + /// LoopDependencies - This class analyzes loop-oriented register + /// dependencies, which are used to guide scheduling decisions. + /// For example, loop induction variable increments should be + /// scheduled as soon as possible after the variable's last use. + /// + class LLVM_LIBRARY_VISIBILITY LoopDependencies { + const MachineLoopInfo &MLI; + const MachineDominatorTree &MDT; + + public: + typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> > + LoopDeps; + LoopDeps Deps; + + LoopDependencies(const MachineLoopInfo &mli, + const MachineDominatorTree &mdt) : + MLI(mli), MDT(mdt) {} + + /// VisitLoop - Clear out any previous state and analyze the given loop. + /// + void VisitLoop(const MachineLoop *Loop) { + Deps.clear(); + MachineBasicBlock *Header = Loop->getHeader(); + SmallSet<unsigned, 8> LoopLiveIns; + for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(), + LE = Header->livein_end(); LI != LE; ++LI) + LoopLiveIns.insert(*LI); + + const MachineDomTreeNode *Node = MDT.getNode(Header); + const MachineBasicBlock *MBB = Node->getBlock(); + assert(Loop->contains(MBB) && + "Loop does not contain header!"); + VisitRegion(Node, MBB, Loop, LoopLiveIns); + } + + private: + void VisitRegion(const MachineDomTreeNode *Node, + const MachineBasicBlock *MBB, + const MachineLoop *Loop, + const SmallSet<unsigned, 8> &LoopLiveIns) { + unsigned Count = 0; + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (LoopLiveIns.count(MOReg)) + Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); + } + ++Count; // Not every iteration due to dbg_value above. + } + + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + for (std::vector<MachineDomTreeNode*>::const_iterator I = + Children.begin(), E = Children.end(); I != E; ++I) { + const MachineDomTreeNode *ChildNode = *I; + MachineBasicBlock *ChildBlock = ChildNode->getBlock(); + if (Loop->contains(ChildBlock)) + VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns); + } + } + }; + + /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of + /// MachineInstrs. + class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG { + const MachineLoopInfo &MLI; + const MachineDominatorTree &MDT; + const MachineFrameInfo *MFI; + + /// Defs, Uses - Remember where defs and uses of each physical register + /// are as we iterate upward through the instructions. This is allocated + /// here instead of inside BuildSchedGraph to avoid the need for it to be + /// initialized and destructed for each block. + std::vector<std::vector<SUnit *> > Defs; + std::vector<std::vector<SUnit *> > Uses; + + /// DbgValueVec - Remember DBG_VALUEs that refer to a particular + /// register. + std::vector<MachineInstr *>DbgValueVec; + + /// PendingLoads - Remember where unknown loads are after the most recent + /// unknown store, as we iterate. As with Defs and Uses, this is here + /// to minimize construction/destruction. + std::vector<SUnit *> PendingLoads; + + /// LoopRegs - Track which registers are used for loop-carried dependencies. + /// + LoopDependencies LoopRegs; + + /// LoopLiveInRegs - Track which regs are live into a loop, to help guide + /// back-edge-aware scheduling. + /// + SmallSet<unsigned, 8> LoopLiveInRegs; + + public: + MachineBasicBlock::iterator Begin; // The beginning of the range to + // be scheduled. The range extends + // to InsertPos. + unsigned InsertPosIndex; // The index in BB of InsertPos. + + explicit ScheduleDAGInstrs(MachineFunction &mf, + const MachineLoopInfo &mli, + const MachineDominatorTree &mdt); + + virtual ~ScheduleDAGInstrs() {} + + /// NewSUnit - Creates a new SUnit and return a ptr to it. + /// + SUnit *NewSUnit(MachineInstr *MI) { +#ifndef NDEBUG + const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0]; +#endif + SUnits.push_back(SUnit(MI, (unsigned)SUnits.size())); + assert((Addr == 0 || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + return &SUnits.back(); + } + + /// Run - perform scheduling. + /// + void Run(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endindex); + + /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are + /// input. + virtual void BuildSchedGraph(AliasAnalysis *AA); + + /// ComputeLatency - Compute node latency. + /// + virtual void ComputeLatency(SUnit *SU); + + /// ComputeOperandLatency - Override dependence edge latency using + /// operand use/def information + /// + virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + SDep& dep) const; + + virtual MachineBasicBlock *EmitSchedule(); + + /// StartBlock - Prepare to perform scheduling in the given block. + /// + virtual void StartBlock(MachineBasicBlock *BB); + + /// Schedule - Order nodes according to selected style, filling + /// in the Sequence member. + /// + virtual void Schedule() = 0; + + /// FinishBlock - Clean up after scheduling in the given block. + /// + virtual void FinishBlock(); + + virtual void dumpNode(const SUnit *SU) const; + + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + }; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp new file mode 100644 index 0000000..027f615 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -0,0 +1,98 @@ +//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" +#include <fstream> +using namespace llvm; + +namespace llvm { + template<> + struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const ScheduleDAG *G) { + return G->MF.getFunction()->getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool hasNodeAddressLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + return true; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, + SUnitIterator EI) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + + + std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph); + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(ScheduleDAG *G, + GraphWriter<ScheduleDAG*> &GW) { + return G->addCustomGraphFeatures(GW); + } + }; +} + +std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU, + const ScheduleDAG *G) { + return G->getGraphNodeLabel(SU); +} + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void ScheduleDAG::viewGraph() { +// This code is only for debugging! +#ifndef NDEBUG + if (BB->getBasicBlock()) + ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, + "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + + ":" + BB->getBasicBlock()->getNameStr()); + else + ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, + "Scheduling-Units Graph for " + MF.getFunction()->getNameStr()); +#else + errs() << "ScheduleDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/CMakeLists.txt b/contrib/llvm/lib/CodeGen/SelectionDAG/CMakeLists.txt new file mode 100644 index 0000000..799988a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -0,0 +1,25 @@ +add_llvm_library(LLVMSelectionDAG + DAGCombiner.cpp + FastISel.cpp + FunctionLoweringInfo.cpp + InstrEmitter.cpp + LegalizeDAG.cpp + LegalizeFloatTypes.cpp + LegalizeIntegerTypes.cpp + LegalizeTypes.cpp + LegalizeTypesGeneric.cpp + LegalizeVectorOps.cpp + LegalizeVectorTypes.cpp + ScheduleDAGFast.cpp + ScheduleDAGList.cpp + ScheduleDAGRRList.cpp + ScheduleDAGSDNodes.cpp + SelectionDAG.cpp + SelectionDAGBuilder.cpp + SelectionDAGISel.cpp + SelectionDAGPrinter.cpp + TargetLowering.cpp + TargetSelectionDAGInfo.cpp + ) + +target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp new file mode 100644 index 0000000..c9c4d91 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -0,0 +1,7222 @@ +//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run +// both before and after the DAG is legalized. +// +// This pass is not a substitute for the LLVM IR instcombine pass. This pass is +// primarily intended to handle simplification opportunities that are implicit +// in the LLVM IR and exposed by the various codegen lowering phases. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dagcombine" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NodesCombined , "Number of dag nodes combined"); +STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); +STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); +STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); + +namespace { + static cl::opt<bool> + CombinerAA("combiner-alias-analysis", cl::Hidden, + cl::desc("Turn on alias analysis during testing")); + + static cl::opt<bool> + CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, + cl::desc("Include global information in alias analysis")); + +//------------------------------ DAGCombiner ---------------------------------// + + class DAGCombiner { + SelectionDAG &DAG; + const TargetLowering &TLI; + CombineLevel Level; + CodeGenOpt::Level OptLevel; + bool LegalOperations; + bool LegalTypes; + + // Worklist of all of the nodes that need to be simplified. + std::vector<SDNode*> WorkList; + + // AA - Used for DAG load/store alias analysis. + AliasAnalysis &AA; + + /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// the instruction to the work lists because they might get more simplified + /// now. + /// + void AddUsersToWorkList(SDNode *N) { + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + AddToWorkList(*UI); + } + + /// visit - call the node-specific routine that knows how to fold each + /// particular type of node. + SDValue visit(SDNode *N); + + public: + /// AddToWorkList - Add to the work list making sure it's instance is at the + /// the back (next to be processed.) + void AddToWorkList(SDNode *N) { + removeFromWorkList(N); + WorkList.push_back(N); + } + + /// removeFromWorkList - remove all instances of N from the worklist. + /// + void removeFromWorkList(SDNode *N) { + WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N), + WorkList.end()); + } + + SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo = true); + + SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { + return CombineTo(N, &Res, 1, AddTo); + } + + SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, + bool AddTo = true) { + SDValue To[] = { Res0, Res1 }; + return CombineTo(N, To, 2, AddTo); + } + + void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); + + private: + + /// SimplifyDemandedBits - Check the specified integer node value to see if + /// it can be simplified or if things it uses can be simplified by bit + /// propagation. If so, return true. + bool SimplifyDemandedBits(SDValue Op) { + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Demanded = APInt::getAllOnesValue(BitWidth); + return SimplifyDemandedBits(Op, Demanded); + } + + bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); + + bool CombineToPreIndexedLoadStore(SDNode *N); + bool CombineToPostIndexedLoadStore(SDNode *N); + + void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); + SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); + SDValue SExtPromoteOperand(SDValue Op, EVT PVT); + SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); + SDValue PromoteIntBinOp(SDValue Op); + SDValue PromoteIntShiftOp(SDValue Op); + SDValue PromoteExtend(SDValue Op); + bool PromoteLoad(SDValue Op); + + /// combine - call the node-specific routine that knows how to fold each + /// particular type of node. If that doesn't do anything, try the + /// target-specific DAG combines. + SDValue combine(SDNode *N); + + // Visitation implementation - Implement dag node combining for different + // node types. The semantics are as follows: + // Return Value: + // SDValue.getNode() == 0 - No change was made + // SDValue.getNode() == N - N was replaced, is dead and has been handled. + // otherwise - N should be replaced by the returned Operand. + // + SDValue visitTokenFactor(SDNode *N); + SDValue visitMERGE_VALUES(SDNode *N); + SDValue visitADD(SDNode *N); + SDValue visitSUB(SDNode *N); + SDValue visitADDC(SDNode *N); + SDValue visitADDE(SDNode *N); + SDValue visitMUL(SDNode *N); + SDValue visitSDIV(SDNode *N); + SDValue visitUDIV(SDNode *N); + SDValue visitSREM(SDNode *N); + SDValue visitUREM(SDNode *N); + SDValue visitMULHU(SDNode *N); + SDValue visitMULHS(SDNode *N); + SDValue visitSMUL_LOHI(SDNode *N); + SDValue visitUMUL_LOHI(SDNode *N); + SDValue visitSDIVREM(SDNode *N); + SDValue visitUDIVREM(SDNode *N); + SDValue visitAND(SDNode *N); + SDValue visitOR(SDNode *N); + SDValue visitXOR(SDNode *N); + SDValue SimplifyVBinOp(SDNode *N); + SDValue visitSHL(SDNode *N); + SDValue visitSRA(SDNode *N); + SDValue visitSRL(SDNode *N); + SDValue visitCTLZ(SDNode *N); + SDValue visitCTTZ(SDNode *N); + SDValue visitCTPOP(SDNode *N); + SDValue visitSELECT(SDNode *N); + SDValue visitSELECT_CC(SDNode *N); + SDValue visitSETCC(SDNode *N); + SDValue visitSIGN_EXTEND(SDNode *N); + SDValue visitZERO_EXTEND(SDNode *N); + SDValue visitANY_EXTEND(SDNode *N); + SDValue visitSIGN_EXTEND_INREG(SDNode *N); + SDValue visitTRUNCATE(SDNode *N); + SDValue visitBIT_CONVERT(SDNode *N); + SDValue visitBUILD_PAIR(SDNode *N); + SDValue visitFADD(SDNode *N); + SDValue visitFSUB(SDNode *N); + SDValue visitFMUL(SDNode *N); + SDValue visitFDIV(SDNode *N); + SDValue visitFREM(SDNode *N); + SDValue visitFCOPYSIGN(SDNode *N); + SDValue visitSINT_TO_FP(SDNode *N); + SDValue visitUINT_TO_FP(SDNode *N); + SDValue visitFP_TO_SINT(SDNode *N); + SDValue visitFP_TO_UINT(SDNode *N); + SDValue visitFP_ROUND(SDNode *N); + SDValue visitFP_ROUND_INREG(SDNode *N); + SDValue visitFP_EXTEND(SDNode *N); + SDValue visitFNEG(SDNode *N); + SDValue visitFABS(SDNode *N); + SDValue visitBRCOND(SDNode *N); + SDValue visitBR_CC(SDNode *N); + SDValue visitLOAD(SDNode *N); + SDValue visitSTORE(SDNode *N); + SDValue visitINSERT_VECTOR_ELT(SDNode *N); + SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); + SDValue visitBUILD_VECTOR(SDNode *N); + SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitMEMBARRIER(SDNode *N); + + SDValue XformToShuffleWithZero(SDNode *N); + SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); + + SDValue visitShiftByConstant(SDNode *N, unsigned Amt); + + bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); + SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); + SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, + bool NotExtCompare = false); + SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + DebugLoc DL, bool foldBooleans = true); + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp); + SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT); + SDValue BuildSDIV(SDNode *N); + SDValue BuildUDIV(SDNode *N); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); + SDValue ReduceLoadWidth(SDNode *N); + SDValue ReduceLoadOpStoreWidth(SDNode *N); + + SDValue GetDemandedBits(SDValue V, const APInt &Mask); + + /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, + /// looking for aliasing nodes and adding them to the Aliases vector. + void GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVector<SDValue, 8> &Aliases); + + /// isAlias - Return true if there is any possibility that the two addresses + /// overlap. + bool isAlias(SDValue Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, + SDValue Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2) const; + + /// FindAliasInfo - Extracts the relevant alias information from the memory + /// node. Returns true if the operand was a load. + bool FindAliasInfo(SDNode *N, + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, int &SrcValueOffset, + unsigned &SrcValueAlignment) const; + + /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, + /// looking for a better chain (aliasing node.) + SDValue FindBetterChain(SDNode *N, SDValue Chain); + + public: + DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} + + /// Run - runs the dag combiner on all nodes in the work list + void Run(CombineLevel AtLevel); + + SelectionDAG &getDAG() const { return DAG; } + + /// getShiftAmountTy - Returns a type large enough to hold any valid + /// shift amount - before type legalization these can be huge. + EVT getShiftAmountTy() { + return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); + } + + /// isTypeLegal - This method returns true if we are running before type + /// legalization or if the specified VT is legal. + bool isTypeLegal(const EVT &VT) { + if (!LegalTypes) return true; + return TLI.isTypeLegal(VT); + } + }; +} + + +namespace { +/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted +/// nodes from the worklist. +class WorkListRemover : public SelectionDAG::DAGUpdateListener { + DAGCombiner &DC; +public: + explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + DC.removeFromWorkList(N); + } + + virtual void NodeUpdated(SDNode *N) { + // Ignore updates. + } +}; +} + +//===----------------------------------------------------------------------===// +// TargetLowering::DAGCombinerInfo implementation +//===----------------------------------------------------------------------===// + +void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { + ((DAGCombiner*)DC)->AddToWorkList(N); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); +} + + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); +} + +void TargetLowering::DAGCombinerInfo:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); +} + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// isNegatibleForFree - Return 1 if we can compute the negated form of the +/// specified expression for the same cost as the expression itself, or 2 if we +/// can compute the negated form more cheaply than the expression itself. +static char isNegatibleForFree(SDValue Op, bool LegalOperations, + unsigned Depth = 0) { + // No compile time optimizations on this type. + if (Op.getValueType() == MVT::ppcf128) + return 0; + + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return 2; + + // Don't allow anything with multiple uses. + if (!Op.hasOneUse()) return 0; + + // Don't recurse exponentially. + if (Depth > 6) return 0; + + switch (Op.getOpcode()) { + default: return false; + case ISD::ConstantFP: + // Don't invert constant FP values after legalize. The negated constant + // isn't necessarily legal. + return LegalOperations ? 0 : 1; + case ISD::FADD: + // FIXME: determine better conditions for this xform. + if (!UnsafeFPMath) return 0; + + // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!UnsafeFPMath) return 0; + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return 1; + + case ISD::FMUL: + case ISD::FDIV: + if (HonorSignDependentRoundingFPMath()) return 0; + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return V; + + return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1); + } +} + +/// GetNegatedExpression - If isNegatibleForFree returns true, this function +/// returns the newly negated expression. +static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, unsigned Depth = 0) { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); + + // Don't allow anything with multiple uses. + assert(Op.hasOneUse() && "Unknown reuse!"); + + assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); + switch (Op.getOpcode()) { + default: llvm_unreachable("Unknown code"); + case ISD::ConstantFP: { + APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, Op.getValueType()); + } + case ISD::FADD: + // FIXME: determine better conditions for this xform. + assert(UnsafeFPMath); + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1), + Op.getOperand(0)); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + assert(UnsafeFPMath); + + // fold (fneg (fsub 0, B)) -> B + if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) + if (N0CFP->getValueAPF().isZero()) + return Op.getOperand(1); + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(1), Op.getOperand(0)); + + case ISD::FMUL: + case ISD::FDIV: + assert(!HonorSignDependentRoundingFPMath()); + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(0), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1)); + + case ISD::FP_EXTEND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1)); + case ISD::FP_ROUND: + return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + } +} + + +// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc +// that selects between the values 1 and 0, making it equivalent to a setcc. +// Also, set the incoming LHS, RHS, and CC references to the appropriate +// nodes based on the type of node we are checking. This simplifies life a +// bit for the callers. +static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) { + if (N.getOpcode() == ISD::SETCC) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(2); + return true; + } + if (N.getOpcode() == ISD::SELECT_CC && + N.getOperand(2).getOpcode() == ISD::Constant && + N.getOperand(3).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && + cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; + } + return false; +} + +// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only +// one use. If this is true, it allows the users to invert the operation for +// free when it is profitable to do so. +static bool isOneUseSetCC(SDValue N) { + SDValue N0, N1, N2; + if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) + return true; + return false; +} + +SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, + SDValue N0, SDValue N1) { + EVT VT = N0.getValueType(); + if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { + if (isa<ConstantSDNode>(N1)) { + // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) + SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, VT, + cast<ConstantSDNode>(N0.getOperand(1)), + cast<ConstantSDNode>(N1)); + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + } else if (N0.hasOneUse()) { + // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use + SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + N0.getOperand(0), N1); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + } + } + + if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { + if (isa<ConstantSDNode>(N0)) { + // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) + SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, VT, + cast<ConstantSDNode>(N1.getOperand(1)), + cast<ConstantSDNode>(N0)); + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + } else if (N1.hasOneUse()) { + // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use + SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + N1.getOperand(0), N0); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo) { + assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.1 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + To[0].getNode()->dump(&DAG); + dbgs() << " and " << NumTo-1 << " other values\n"; + for (unsigned i = 0, e = NumTo; i != e; ++i) + assert((!To[i].getNode() || + N->getValueType(i) == To[i].getValueType()) && + "Cannot combine value to value of different type!")); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesWith(N, To, &DeadNodes); + + if (AddTo) { + // Push the new nodes and any users onto the worklist + for (unsigned i = 0, e = NumTo; i != e; ++i) { + if (To[i].getNode()) { + AddToWorkList(To[i].getNode()); + AddUsersToWorkList(To[i].getNode()); + } + } + } + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) { + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + } + return SDValue(N, 0); +} + +void DAGCombiner:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + // Replace all uses. If any nodes become isomorphic to other nodes and + // are deleted, make sure to remove them from our worklist. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); + + // Push the new node and any (possibly new) users onto the worklist. + AddToWorkList(TLO.New.getNode()); + AddUsersToWorkList(TLO.New.getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (TLO.Old.getNode()->use_empty()) { + removeFromWorkList(TLO.Old.getNode()); + + // If the operands of this node are only used by the node, they will now + // be dead. Make sure to visit them first to delete dead nodes early. + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) + if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) + AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); + + DAG.DeleteNode(TLO.Old.getNode()); + } +} + +/// SimplifyDemandedBits - Check the specified integer node value to see if +/// it can be simplified or if things it uses can be simplified by bit +/// propagation. If so, return true. +bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); + APInt KnownZero, KnownOne; + if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + return false; + + // Revisit the node. + AddToWorkList(Op.getNode()); + + // Replace the old value with the new one. + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.2 "; + TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; + TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); + + CommitTargetLoweringOpt(TLO); + return true; +} + +void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { + DebugLoc dl = Load->getDebugLoc(); + EVT VT = Load->getValueType(0); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); + + DEBUG(dbgs() << "\nReplacing.9 "; + Load->dump(&DAG); + dbgs() << "\nWith: "; + Trunc.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1), + &DeadNodes); + removeFromWorkList(Load); + DAG.DeleteNode(Load); + AddToWorkList(Trunc.getNode()); +} + +SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { + Replace = false; + DebugLoc dl = Op.getDebugLoc(); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + : LD->getExtensionType(); + Replace = true; + return DAG.getExtLoad(ExtType, PVT, dl, + LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + } + + unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: break; + case ISD::AssertSext: + return DAG.getNode(ISD::AssertSext, dl, PVT, + SExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::AssertZext: + return DAG.getNode(ISD::AssertZext, dl, PVT, + ZExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::Constant: { + unsigned ExtOpc = + Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOpc, dl, PVT, Op); + } + } + + if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) + return SDValue(); + return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); +} + +SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { + if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) + return SDValue(); + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (NewOp.getNode() == 0) + return SDValue(); + AddToWorkList(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, + DAG.getValueType(OldVT)); +} + +SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (NewOp.getNode() == 0) + return SDValue(); + AddToWorkList(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getZeroExtendInReg(NewOp, dl, OldVT); +} + +/// PromoteIntBinOp - Promote the specified integer binary operation if the +/// target indicates it is beneficial. e.g. On x86, it's usually better to +/// promote i16 operations to i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace0 = false; + SDValue N0 = Op.getOperand(0); + SDValue NN0 = PromoteOperand(N0, PVT, Replace0); + if (NN0.getNode() == 0) + return SDValue(); + + bool Replace1 = false; + SDValue N1 = Op.getOperand(1); + SDValue NN1; + if (N0 == N1) + NN1 = NN0; + else { + NN1 = PromoteOperand(N1, PVT, Replace1); + if (NN1.getNode() == 0) + return SDValue(); + } + + AddToWorkList(NN0.getNode()); + if (NN1.getNode()) + AddToWorkList(NN1.getNode()); + + if (Replace0) + ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); + if (Replace1) + ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, NN0, NN1)); + } + return SDValue(); +} + +/// PromoteIntShiftOp - Promote the specified integer shift operation if the +/// target indicates it is beneficial. e.g. On x86, it's usually better to +/// promote i16 operations to i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace = false; + SDValue N0 = Op.getOperand(0); + if (Opc == ISD::SRA) + N0 = SExtPromoteOperand(Op.getOperand(0), PVT); + else if (Opc == ISD::SRL) + N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); + else + N0 = PromoteOperand(N0, PVT, Replace); + if (N0.getNode() == 0) + return SDValue(); + + AddToWorkList(N0.getNode()); + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); + } + return SDValue(); +} + +SDValue DAGCombiner::PromoteExtend(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); + } + return SDValue(); +} + +bool DAGCombiner::PromoteLoad(SDValue Op) { + if (!LegalOperations) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return false; + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return false; + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + DebugLoc dl = Op.getDebugLoc(); + SDNode *N = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + : LD->getExtensionType(); + SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl, + LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); + + DEBUG(dbgs() << "\nPromoting "; + N->dump(&DAG); + dbgs() << "\nTo: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes); + removeFromWorkList(N); + DAG.DeleteNode(N); + AddToWorkList(Result.getNode()); + return true; + } + return false; +} + + +//===----------------------------------------------------------------------===// +// Main DAG Combiner implementation +//===----------------------------------------------------------------------===// + +void DAGCombiner::Run(CombineLevel AtLevel) { + // set the instance variables, so that the various visit routines may use it. + Level = AtLevel; + LegalOperations = Level >= NoIllegalOperations; + LegalTypes = Level >= NoIllegalTypes; + + // Add all the dag nodes to the worklist. + WorkList.reserve(DAG.allnodes_size()); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) + WorkList.push_back(I); + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + + // The root of the dag may dangle to deleted nodes until the dag combiner is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // while the worklist isn't empty, inspect the node on the end of it and + // try and combine it. + while (!WorkList.empty()) { + SDNode *N = WorkList.back(); + WorkList.pop_back(); + + // If N has no uses, it is dead. Make sure to revisit all N's operands once + // N is deleted from the DAG, since they too may now be dead or may have a + // reduced number of uses, allowing other xforms. + if (N->use_empty() && N != &Dummy) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + AddToWorkList(N->getOperand(i).getNode()); + + DAG.DeleteNode(N); + continue; + } + + SDValue RV = combine(N); + + if (RV.getNode() == 0) + continue; + + ++NodesCombined; + + // If we get back the same node we passed in, rather than a new node or + // zero, we know that the node must have defined multiple values and + // CombineTo was used. Since CombineTo takes care of the worklist + // mechanics for us, we have no work to do in this case. + if (RV.getNode() == N) + continue; + + assert(N->getOpcode() != ISD::DELETED_NODE && + RV.getNode()->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned new node!"); + + DEBUG(dbgs() << "\nReplacing.3 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + RV.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + if (N->getNumValues() == RV.getNode()->getNumValues()) + DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); + else { + assert(N->getValueType(0) == RV.getValueType() && + N->getNumValues() == 1 && "Type mismatch"); + SDValue OpV = RV; + DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes); + } + + // Push the new node and any users onto the worklist + AddToWorkList(RV.getNode()); + AddUsersToWorkList(RV.getNode()); + + // Add any uses of the old node to the worklist in case this node is the + // last one that uses them. They may become dead after this node is + // deleted. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + AddToWorkList(N->getOperand(i).getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) { + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + } + } + + // If the root changed (e.g. it was a dead load, update the root). + DAG.setRoot(Dummy.getValue()); +} + +SDValue DAGCombiner::visit(SDNode *N) { + switch (N->getOpcode()) { + default: break; + case ISD::TokenFactor: return visitTokenFactor(N); + case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); + case ISD::ADD: return visitADD(N); + case ISD::SUB: return visitSUB(N); + case ISD::ADDC: return visitADDC(N); + case ISD::ADDE: return visitADDE(N); + case ISD::MUL: return visitMUL(N); + case ISD::SDIV: return visitSDIV(N); + case ISD::UDIV: return visitUDIV(N); + case ISD::SREM: return visitSREM(N); + case ISD::UREM: return visitUREM(N); + case ISD::MULHU: return visitMULHU(N); + case ISD::MULHS: return visitMULHS(N); + case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); + case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); + case ISD::SDIVREM: return visitSDIVREM(N); + case ISD::UDIVREM: return visitUDIVREM(N); + case ISD::AND: return visitAND(N); + case ISD::OR: return visitOR(N); + case ISD::XOR: return visitXOR(N); + case ISD::SHL: return visitSHL(N); + case ISD::SRA: return visitSRA(N); + case ISD::SRL: return visitSRL(N); + case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTPOP: return visitCTPOP(N); + case ISD::SELECT: return visitSELECT(N); + case ISD::SELECT_CC: return visitSELECT_CC(N); + case ISD::SETCC: return visitSETCC(N); + case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); + case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); + case ISD::ANY_EXTEND: return visitANY_EXTEND(N); + case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::TRUNCATE: return visitTRUNCATE(N); + case ISD::BIT_CONVERT: return visitBIT_CONVERT(N); + case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); + case ISD::FADD: return visitFADD(N); + case ISD::FSUB: return visitFSUB(N); + case ISD::FMUL: return visitFMUL(N); + case ISD::FDIV: return visitFDIV(N); + case ISD::FREM: return visitFREM(N); + case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); + case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); + case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); + case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); + case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); + case ISD::FP_ROUND: return visitFP_ROUND(N); + case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); + case ISD::FP_EXTEND: return visitFP_EXTEND(N); + case ISD::FNEG: return visitFNEG(N); + case ISD::FABS: return visitFABS(N); + case ISD::BRCOND: return visitBRCOND(N); + case ISD::BR_CC: return visitBR_CC(N); + case ISD::LOAD: return visitLOAD(N); + case ISD::STORE: return visitSTORE(N); + case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); + case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); + case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); + case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::MEMBARRIER: return visitMEMBARRIER(N); + } + return SDValue(); +} + +SDValue DAGCombiner::combine(SDNode *N) { + SDValue RV = visit(N); + + // If nothing happened, try a target-specific DAG combine. + if (RV.getNode() == 0) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned NULL!"); + + if (N->getOpcode() >= ISD::BUILTIN_OP_END || + TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { + + // Expose the DAG combiner to the target combiner impls. + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); + + RV = TLI.PerformDAGCombine(N, DagCombineInfo); + } + } + + // If nothing happened still, try promoting the operation. + if (RV.getNode() == 0) { + switch (N->getOpcode()) { + default: break; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + RV = PromoteIntBinOp(SDValue(N, 0)); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + RV = PromoteIntShiftOp(SDValue(N, 0)); + break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + RV = PromoteExtend(SDValue(N, 0)); + break; + case ISD::LOAD: + if (PromoteLoad(SDValue(N, 0))) + RV = SDValue(N, 0); + break; + } + } + + // If N is a commutative binary node, try commuting it to enable more + // sdisel CSE. + if (RV.getNode() == 0 && + SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + N->getNumValues() == 1) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Constant operands are canonicalized to RHS. + if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { + SDValue Ops[] = { N1, N0 }; + SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), + Ops, 2); + if (CSENode) + return SDValue(CSENode, 0); + } + } + + return RV; +} + +/// getInputChainForNode - Given a node, return its input chain if it has one, +/// otherwise return a null sd operand. +static SDValue getInputChainForNode(SDNode *N) { + if (unsigned NumOps = N->getNumOperands()) { + if (N->getOperand(0).getValueType() == MVT::Other) + return N->getOperand(0); + else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + return N->getOperand(NumOps-1); + for (unsigned i = 1; i < NumOps-1; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + return N->getOperand(i); + } + return SDValue(); +} + +SDValue DAGCombiner::visitTokenFactor(SDNode *N) { + // If N has two operands, where one has an input chain equal to the other, + // the 'other' chain is redundant. + if (N->getNumOperands() == 2) { + if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) + return N->getOperand(0); + if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) + return N->getOperand(1); + } + + SmallVector<SDNode *, 8> TFs; // List of token factors to visit. + SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. + SmallPtrSet<SDNode*, 16> SeenOps; + bool Changed = false; // If we should replace this token factor. + + // Start out with this token factor. + TFs.push_back(N); + + // Iterate through token factors. The TFs grows when new token factors are + // encountered. + for (unsigned i = 0; i < TFs.size(); ++i) { + SDNode *TF = TFs[i]; + + // Check each of the operands. + for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { + SDValue Op = TF->getOperand(i); + + switch (Op.getOpcode()) { + case ISD::EntryToken: + // Entry tokens don't need to be added to the list. They are + // rededundant. + Changed = true; + break; + + case ISD::TokenFactor: + if (Op.hasOneUse() && + std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { + // Queue up for processing. + TFs.push_back(Op.getNode()); + // Clean up in case the token factor is removed. + AddToWorkList(Op.getNode()); + Changed = true; + break; + } + // Fall thru + + default: + // Only add if it isn't already in the list. + if (SeenOps.insert(Op.getNode())) + Ops.push_back(Op); + else + Changed = true; + break; + } + } + } + + SDValue Result; + + // If we've change things around then replace token factor. + if (Changed) { + if (Ops.empty()) { + // The entry token is the only possible outcome. + Result = DAG.getEntryNode(); + } else { + // New and improved token factor. + Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, &Ops[0], Ops.size()); + } + + // Don't add users to work list. + return CombineTo(N, Result, false); + } + + return Result; +} + +/// MERGE_VALUES can always be eliminated. +SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { + WorkListRemover DeadNodes(*this); + // Replacing results may cause a different MERGE_VALUES to suddenly + // be CSE'd with N, and carry its uses with it. Iterate until no + // uses remain, to ensure that the node can be safely deleted. + do { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), + &DeadNodes); + } while (!N->use_empty()); + removeFromWorkList(N); + DAG.DeleteNode(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +static +SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, + SelectionDAG &DAG) { + EVT VT = N0.getValueType(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); + + if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && + isa<ConstantSDNode>(N00.getOperand(1))) { + // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) + N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, + N00.getOperand(0), N01), + DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, + N00.getOperand(1), N01)); + return DAG.getNode(ISD::ADD, DL, VT, N0, N1); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (add x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (add c1, c2) -> c1+c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); + // fold (add x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && + GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + GA->getOffset() + + (uint64_t)N1C->getSExtValue()); + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N1C && N0.getOpcode() == ISD::SUB) + if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(N1C->getAPIntValue()+ + N0C->getAPIntValue(), VT), + N0.getOperand(1)); + // reassociate add + SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); + if (RADD.getNode() != 0) + return RADD; + // fold ((0-A) + B) -> B-A + if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && + cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); + // fold (A + (0-B)) -> A-B + if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && + cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); + // fold (A+(B-A)) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) + return N1.getOperand(0); + // fold ((B-A)+A) -> B + if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) + return N0.getOperand(0); + // fold (A+(B-(A+C))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(0)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(1)); + // fold (A+(B-(C+A))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(1)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(0)); + // fold (A+((B-A)+or-C)) to (B+or-C) + if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && + N1.getOperand(0).getOpcode() == ISD::SUB && + N0 == N1.getOperand(0).getOperand(1)) + return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, + N1.getOperand(0).getOperand(0), N1.getOperand(1)); + + // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + + if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), + DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); + } + + if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (a+b) -> (a|b) iff a and b share no bits. + if (VT.isInteger() && !VT.isVector()) { + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || + (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); + } + } + + // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) + if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { + SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); + if (Result.getNode()) return Result; + } + if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { + SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); + if (Result.getNode()) return Result; + } + + // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) + if (N1.getOpcode() == ISD::SHL && + N1.getOperand(0).getOpcode() == ISD::SUB) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) + if (C->getAPIntValue() == 0) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1))); + if (N0.getOpcode() == ISD::SHL && + N0.getOperand(0).getOpcode() == ISD::SUB) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) + if (C->getAPIntValue() == 0) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + N0.getOperand(0).getOperand(1), + N0.getOperand(1))); + + return SDValue(); +} + +SDValue DAGCombiner::visitADDC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // If the flag result is dead, turn this into an ADD. + if (N->hasNUsesOfValue(0, 1)) + return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + + // canonicalize constant to RHS. + if (N0C && !N1C) + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + + // fold (addc x, 0) -> x + no carry out + if (N1C && N1C->isNullValue()) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + + // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || + (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), + N1, N0, CarryIn); + + // fold (adde x, y, false) -> (addc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (sub x, x) -> 0 + if (N0 == N1) + return DAG.getConstant(0, N->getValueType(0)); + // fold (sub c1, c2) -> c1-c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); + // fold (sub x, c) -> (add x, -c) + if (N1C) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, + DAG.getConstant(-N1C->getAPIntValue(), VT)); + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + if (N0C && N0C->isAllOnesValue()) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + // fold (A+B)-A -> B + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) + return N0.getOperand(1); + // fold (A+B)-B -> A + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) + return N0.getOperand(0); + // fold ((A+(B+or-C))-B) -> A+or-C + if (N0.getOpcode() == ISD::ADD && + (N0.getOperand(1).getOpcode() == ISD::SUB || + N0.getOperand(1).getOpcode() == ISD::ADD) && + N0.getOperand(1).getOperand(0) == N1) + return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(1)); + // fold ((A+(C+B))-B) -> A+C + if (N0.getOpcode() == ISD::ADD && + N0.getOperand(1).getOpcode() == ISD::ADD && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + // fold ((A-(B-C))-C) -> A-B + if (N0.getOpcode() == ISD::SUB && + N0.getOperand(1).getOpcode() == ISD::SUB && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + + // If either operand of a sub is undef, the result is undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + // If the relocation model supports it, consider symbol offsets. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { + // fold (sub Sym, c) -> Sym-c + if (N1C && GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + GA->getOffset() - + (uint64_t)N1C->getSExtValue()); + // fold (sub Sym+c1, Sym+c2) -> c1-c2 + if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) + if (GA->getGlobal() == GB->getGlobal()) + return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), + VT); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (mul c1, c2) -> c1*c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); + // fold (mul x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mul x, -1) -> 0-x + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), N0); + // fold (mul x, (1 << c)) -> x << c + if (N1C && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), + getShiftAmountTy())); + // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c + if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { + unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); + // FIXME: If the input is something that is easily negated (e.g. a + // single-use add), we should put the negate there. + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getConstant(Log2Val, getShiftAmountTy()))); + } + // (mul (shl X, c1), c2) -> (mul X, c2 << c1) + if (N1C && N0.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N0.getOperand(1))) { + SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + N1, N0.getOperand(1)); + AddToWorkList(C3.getNode()); + return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + N0.getOperand(0), C3); + } + + // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one + // use. + { + SDValue Sh(0,0), Y(0,0); + // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). + if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getNode()->hasOneUse()) { + Sh = N0; Y = N1; + } else if (N1.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N1.getOperand(1)) && + N1.getNode()->hasOneUse()) { + Sh = N1; Y = N0; + } + + if (Sh.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + Mul, Sh.getOperand(1)); + } + } + + // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) + if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, + N0.getOperand(1), N1)); + + // reassociate mul + SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); + if (RMUL.getNode() != 0) + return RMUL; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + EVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (sdiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); + // fold (sdiv X, 1) -> X + if (N1C && N1C->getSExtValue() == 1LL) + return N0; + // fold (sdiv X, -1) -> 0-X + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), N0); + // If we know the sign bits of both operands are zero, strength reduce to a + // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), + N0, N1); + } + // fold (sdiv X, pow2) -> simple ops after legalize + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() && + (isPowerOf2_64(N1C->getSExtValue()) || + isPowerOf2_64(-N1C->getSExtValue()))) { + // If dividing by powers of two is cheap, then don't perform the following + // fold. + if (TLI.isPow2DivCheap()) + return SDValue(); + + int64_t pow2 = N1C->getSExtValue(); + int64_t abs2 = pow2 > 0 ? pow2 : -pow2; + unsigned lg2 = Log2_64(abs2); + + // Splat the sign bit into the register + SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + DAG.getConstant(VT.getSizeInBits()-1, + getShiftAmountTy())); + AddToWorkList(SGN.getNode()); + + // Add (N0 < 0) ? abs2 - 1 : 0; + SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, + DAG.getConstant(VT.getSizeInBits() - lg2, + getShiftAmountTy())); + SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); + AddToWorkList(SRL.getNode()); + AddToWorkList(ADD.getNode()); // Divide by pow2 + SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, + DAG.getConstant(lg2, getShiftAmountTy())); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (pow2 > 0) + return SRA; + + AddToWorkList(SRA.getNode()); + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), SRA); + } + + // if integer divide is expensive and we satisfy the requirements, emit an + // alternate sequence. + if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) && + !TLI.isIntDivCheap()) { + SDValue Op = BuildSDIV(N); + if (Op.getNode()) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + EVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (udiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); + // fold (udiv x, (1 << c)) -> x >>u c + if (N1C && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), + getShiftAmountTy())); + // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + EVT ADDVT = N1.getOperand(1).getValueType(); + SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, + N1.getOperand(1), + DAG.getConstant(SHC->getAPIntValue() + .logBase2(), + ADDVT)); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); + } + } + } + // fold (udiv x, c) -> alternate + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + SDValue Op = BuildUDIV(N); + if (Op.getNode()) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitSREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold (srem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); + // If we know the sign bits of both operands are zero, strength reduce to a + // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); + } + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + if (N1C && !N1C->isNullValue()) { + SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); + AddToWorkList(Div.getNode()); + SDValue OptimizedDiv = combine(Div.getNode()); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + AddToWorkList(Mul.getNode()); + return Sub; + } + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitUREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold (urem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); + // fold (urem x, pow2) -> (and x, pow2-1) + if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue()-1,VT)); + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + SDValue Add = + DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), + VT)); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); + } + } + } + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + if (N1C && !N1C->isNullValue()) { + SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); + AddToWorkList(Div.getNode()); + SDValue OptimizedDiv = combine(Div.getNode()); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + AddToWorkList(Mul.getNode()); + return Sub; + } + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHS(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold (mulhs x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhs x, 1) -> (sra x, size(x)-1) + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, + DAG.getConstant(N0.getValueType().getSizeInBits() - 1, + getShiftAmountTy())); + // fold (mulhs x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHU(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold (mulhu x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhu x, 1) -> 0 + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getConstant(0, N0.getValueType()); + // fold (mulhu x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + return SDValue(); +} + +/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that +/// compute two values. LoOp and HiOp give the opcodes for the two computations +/// that are being performed. Return true if a simplification was made. +/// +SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp) { + // If the high half is not needed, just compute the low half. + bool HiExists = N->hasAnyUseOfValue(1); + if (!HiExists && + (!LegalOperations || + TLI.isOperationLegal(LoOp, N->getValueType(0)))) { + SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + N->op_begin(), N->getNumOperands()); + return CombineTo(N, Res, Res); + } + + // If the low half is not needed, just compute the high half. + bool LoExists = N->hasAnyUseOfValue(0); + if (!LoExists && + (!LegalOperations || + TLI.isOperationLegal(HiOp, N->getValueType(1)))) { + SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + N->op_begin(), N->getNumOperands()); + return CombineTo(N, Res, Res); + } + + // If both halves are used, return as it is. + if (LoExists && HiExists) + return SDValue(); + + // If the two computed results can be simplified separately, separate them. + if (LoExists) { + SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + N->op_begin(), N->getNumOperands()); + AddToWorkList(Lo.getNode()); + SDValue LoOpt = combine(Lo.getNode()); + if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && + (!LegalOperations || + TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) + return CombineTo(N, LoOpt, LoOpt); + } + + if (HiExists) { + SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + N->op_begin(), N->getNumOperands()); + AddToWorkList(Hi.getNode()); + SDValue HiOpt = combine(Hi.getNode()); + if (HiOpt.getNode() && HiOpt != Hi && + (!LegalOperations || + TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) + return CombineTo(N, HiOpt, HiOpt); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIVREM(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIVREM(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); + if (Res.getNode()) return Res; + + return SDValue(); +} + +/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with +/// two operands of the same opcode, try to simplify it. +SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); + + // Bail early if none of these transforms apply. + if (N0.getNode()->getNumOperands() == 0) return SDValue(); + + // For each of OP in AND/OR/XOR: + // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) + // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) + // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + // + // do not sink logical op inside of a vector extend, since it may combine + // into a vsetcc. + EVT Op0VT = N0.getOperand(0).getValueType(); + if ((N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || + // Avoid infinite looping with PromoteIntBinOp. + (N0.getOpcode() == ISD::ANY_EXTEND && + (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || + (N0.getOpcode() == ISD::TRUNCATE && + (!TLI.isZExtFree(VT, Op0VT) || + !TLI.isTruncateFree(Op0VT, VT)) && + TLI.isTypeLegal(Op0VT))) && + !VT.isVector() && + Op0VT == N1.getOperand(0).getValueType() && + (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { + SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); + } + + // For each of OP in SHL/SRL/SRA/AND... + // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) + // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) + // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || + N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && + N0.getOperand(1) == N1.getOperand(1)) { + SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ORNode, N0.getOperand(1)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitAND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N1.getValueType(); + unsigned BitWidth = VT.getScalarType().getSizeInBits(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (and c1, c2) -> c1&c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); + // fold (and x, -1) -> x + if (N1C && N1C->isAllOnesValue()) + return N0; + // if (and x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(BitWidth))) + return DAG.getConstant(0, VT); + // reassociate and + SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); + if (RAND.getNode() != 0) + return RAND; + // fold (and (or x, C), D) -> D if (C & D) == D + if (N1C && N0.getOpcode() == ISD::OR) + if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) + return N1; + // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N0Op0 = N0.getOperand(0); + APInt Mask = ~N1C->getAPIntValue(); + Mask.trunc(N0Op0.getValueSizeInBits()); + if (DAG.MaskedValueIsZero(N0Op0, Mask)) { + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), + N0.getValueType(), N0Op0); + + // Replace uses of the AND with uses of the Zero extend node. + CombineTo(N, Zext); + + // We actually want to replace all uses of the any_extend with the + // zero_extend, to avoid duplicating things. This will later cause this + // AND to be folded. + CombineTo(N0.getNode(), Zext); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { + SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + LL, LR, Result); + } + } + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), + LN0->getSrcValueOffset(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (and (load x), 255) -> (zextload x, i8) + // fold (and (extload x, i16), 255) -> (zextload x, i8) + // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) + if (N1C && (N0.getOpcode() == ISD::LOAD || + (N0.getOpcode() == ISD::ANY_EXTEND && + N0.getOperand(0).getOpcode() == ISD::LOAD))) { + bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; + LoadSDNode *LN0 = HasAnyExt + ? cast<LoadSDNode>(N0.getOperand(0)) + : cast<LoadSDNode>(N0); + if (LN0->getExtensionType() != ISD::SEXTLOAD && + LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { + uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); + if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT LoadedVT = LN0->getMemoryVT(); + + if (ExtVT == LoadedVT && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + + SDValue NewLoad = + DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + AddToWorkList(N); + CombineTo(LN0, NewLoad, NewLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // Do not change the width of a volatile load. + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT PtrType = LN0->getOperand(1).getValueType(); + + unsigned Alignment = LN0->getAlignment(); + SDValue NewPtr = LN0->getBasePtr(); + + // For big endian targets, we need to add an offset to the pointer + // to load the correct bytes. For little endian systems, we merely + // need to read fewer bytes from the same pointer. + if (TLI.isBigEndian()) { + unsigned LVTStoreBytes = LoadedVT.getStoreSize(); + unsigned EVTStoreBytes = ExtVT.getStoreSize(); + unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; + NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr, DAG.getConstant(PtrOff, PtrType)); + Alignment = MinAlign(Alignment, PtrOff); + } + + AddToWorkList(NewPtr.getNode()); + + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), + LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + Alignment); + AddToWorkList(N); + CombineTo(LN0, Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N1.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (or x, undef) -> -1 + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + } + // fold (or c1, c2) -> c1|c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); + // fold (or x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (or x, -1) -> -1 + if (N1C && N1C->isAllOnesValue()) + return N1; + // fold (or x, c) -> c iff (x & ~c) == 0 + if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) + return N1; + // reassociate or + SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); + if (ROR.getNode() != 0) + return ROR; + // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) + // iff (c1 & c2) == 0. + if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); + if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) + // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) + // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + LL, LR, Result); + } + } + + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N1.getOperand(1).getOpcode() == ISD::Constant && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + const APInt &RHSMask = + cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, + DAG.getConstant(LHSMask | RHSMask, VT)); + } + } + + // See if this is some rotate idiom. + if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) + return SDValue(Rot, 0); + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. +static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { + if (Op.getOpcode() == ISD::AND) { + if (isa<ConstantSDNode>(Op.getOperand(1))) { + Mask = Op.getOperand(1); + Op = Op.getOperand(0); + } else { + return false; + } + } + + if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { + Shift = Op; + return true; + } + + return false; +} + +// MatchRotate - Handle an 'or' of two operands. If this is one of the many +// idioms for rotate, and if the target supports rotation instructions, generate +// a rot[lr]. +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { + // Must be a legal type. Expanded 'n promoted things won't work with rotates. + EVT VT = LHS.getValueType(); + if (!TLI.isTypeLegal(VT)) return 0; + + // The target must have at least one rotate flavor. + bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); + bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); + if (!HasROTL && !HasROTR) return 0; + + // Match "(X shl/srl V1) & V2" where V2 may not be present. + SDValue LHSShift; // The shift. + SDValue LHSMask; // AND value if any. + if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) + return 0; // Not part of a rotate. + + SDValue RHSShift; // The shift. + SDValue RHSMask; // AND value if any. + if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) + return 0; // Not part of a rotate. + + if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) + return 0; // Not shifting the same value. + + if (LHSShift.getOpcode() == RHSShift.getOpcode()) + return 0; // Shifts must disagree. + + // Canonicalize shl to left side in a shl/srl pair. + if (RHSShift.getOpcode() == ISD::SHL) { + std::swap(LHS, RHS); + std::swap(LHSShift, RHSShift); + std::swap(LHSMask , RHSMask ); + } + + unsigned OpSizeInBits = VT.getSizeInBits(); + SDValue LHSShiftArg = LHSShift.getOperand(0); + SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftAmt = RHSShift.getOperand(1); + + // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) + // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + if (LHSShiftAmt.getOpcode() == ISD::Constant && + RHSShiftAmt.getOpcode() == ISD::Constant) { + uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); + uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); + if ((LShVal + RShVal) != OpSizeInBits) + return 0; + + SDValue Rot; + if (HasROTL) + Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); + else + Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); + + // If there is an AND of either shifted operand, apply it to the result. + if (LHSMask.getNode() || RHSMask.getNode()) { + APInt Mask = APInt::getAllOnesValue(OpSizeInBits); + + if (LHSMask.getNode()) { + APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); + Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; + } + if (RHSMask.getNode()) { + APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); + Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; + } + + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); + } + + return Rot.getNode(); + } + + // If there is a mask here, and we have a variable shift, we can't be sure + // that we're masking out the right stuff. + if (LHSMask.getNode() || RHSMask.getNode()) + return 0; + + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) + if (RHSShiftAmt.getOpcode() == ISD::SUB && + LHSShiftAmt == RHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + if (HasROTL) + return DAG.getNode(ISD::ROTL, DL, VT, + LHSShiftArg, LHSShiftAmt).getNode(); + else + return DAG.getNode(ISD::ROTR, DL, VT, + LHSShiftArg, RHSShiftAmt).getNode(); + } + } + } + + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) + if (LHSShiftAmt.getOpcode() == ISD::SUB && + RHSShiftAmt == LHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + if (HasROTR) + return DAG.getNode(ISD::ROTR, DL, VT, + LHSShiftArg, RHSShiftAmt).getNode(); + else + return DAG.getNode(ISD::ROTL, DL, VT, + LHSShiftArg, LHSShiftAmt).getNode(); + } + } + } + + // Look for sign/zext/any-extended or truncate cases: + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND + || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND + || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND + || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND + || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND + || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND + || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + SDValue LExtOp0 = LHSShiftAmt.getOperand(0); + SDValue RExtOp0 = RHSShiftAmt.getOperand(0); + if (RExtOp0.getOpcode() == ISD::SUB && + RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } + } + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotr x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, + LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); + } + } + } + } + + return 0; +} + +SDValue DAGCombiner::visitXOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LHS, RHS, CC; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (xor undef, undef) -> 0. This is a common idiom (misuse). + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (xor x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (xor c1, c2) -> c1^c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + // fold (xor x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // reassociate xor + SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); + if (RXOR.getNode() != 0) + return RXOR; + + // fold !(x cc y) -> (x !cc y) + if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { + bool isInt = LHS.getValueType().isInteger(); + ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + isInt); + + if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { + switch (N0.getOpcode()) { + default: + llvm_unreachable("Unhandled SetCC Equivalent!"); + case ISD::SETCC: + return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); + case ISD::SELECT_CC: + return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), + N0.getOperand(3), NotCC); + } + } + } + + // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) + if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getNode()->hasOneUse() && + isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ + SDValue V = N0.getOperand(0); + V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, + DAG.getConstant(1, V.getValueType())); + AddToWorkList(V.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); + } + + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc + if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + } + } + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants + if (N1C && N1C->isAllOnesValue() && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + } + } + // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) + if (N1C && N0.getOpcode() == ISD::XOR) { + ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (N00C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), + DAG.getConstant(N1C->getAPIntValue() ^ + N00C->getAPIntValue(), VT)); + if (N01C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(N1C->getAPIntValue() ^ + N01C->getAPIntValue(), VT)); + } + // fold (xor x, x) -> 0 + if (N0 == N1) { + if (!VT.isVector()) { + return DAG.getConstant(0, VT); + } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){ + // Produce a vector of zeros. + SDValue El = DAG.getConstant(0, VT.getVectorElementType()); + std::vector<SDValue> Ops(VT.getVectorNumElements(), El); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + } + + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // Simplify the expression using non-local knowledge. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +/// visitShiftByConstant - Handle transforms common to the three shifts, when +/// the shift amount is a constant. +SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { + SDNode *LHS = N->getOperand(0).getNode(); + if (!LHS->hasOneUse()) return SDValue(); + + // We want to pull some binops through shifts, so that we have (and (shift)) + // instead of (shift (and)), likewise for add, or, xor, etc. This sort of + // thing happens with address calculations, so it's important to canonicalize + // it. + bool HighBitSet = false; // Can we transform this if the high bit is set? + + switch (LHS->getOpcode()) { + default: return SDValue(); + case ISD::OR: + case ISD::XOR: + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + case ISD::AND: + HighBitSet = true; // We can only transform sra if the high bit is set. + break; + case ISD::ADD: + if (N->getOpcode() != ISD::SHL) + return SDValue(); // only shl(add) not sr[al](add). + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + } + + // We require the RHS of the binop to be a constant as well. + ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); + if (!BinOpCst) return SDValue(); + + // FIXME: disable this unless the input to the binop is a shift by a constant. + // If it is not a shift, it pessimizes some common cases like: + // + // void foo(int *X, int i) { X[i & 1235] = 1; } + // int bar(int *X, int i) { return X[i & 255]; } + SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); + if ((BinOpLHSVal->getOpcode() != ISD::SHL && + BinOpLHSVal->getOpcode() != ISD::SRA && + BinOpLHSVal->getOpcode() != ISD::SRL) || + !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) + return SDValue(); + + EVT VT = N->getValueType(0); + + // If this is a signed shift right, and the high bit is modified by the + // logical operation, do not perform the transformation. The highBitSet + // boolean indicates the value of the high bit of the constant which would + // cause it to be modified for this operation. + if (N->getOpcode() == ISD::SRA) { + bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); + if (BinOpRHSSignSet != HighBitSet) + return SDValue(); + } + + // Fold the constants, shifting the binop RHS by the shift amount. + SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), + N->getValueType(0), + LHS->getOperand(1), N->getOperand(1)); + + // Create the new shift. + SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(), + VT, LHS->getOperand(0), N->getOperand(1)); + + // Create the new binop. + return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); +} + +SDValue DAGCombiner::visitSHL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + + // fold (shl c1, c2) -> c1<<c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); + // fold (shl 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (shl x, c >= size(x)) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (shl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (shl x, c) is known to be zero, return 0 + if (DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(OpSizeInBits))) + return DAG.getConstant(0, VT); + // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + EVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SHL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 > OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or + // (srl (and x, (shl -1, c1)), (sub c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + if (c1 < VT.getSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - c1), + VT); + SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, + N0.getOperand(0), + HiBitsMask); + if (c2 > c1) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c2-c1, N1.getValueType())); + else + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c1-c2, N1.getValueType())); + } + } + // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) + if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - + N1C->getZExtValue()), + VT); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + HiBitsMask); + } + + if (N1C) { + SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSHL.getNode()) + return NewSHL; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSRA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + + // fold (sra c1, c2) -> (sra c1, c2) + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); + // fold (sra 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (sra -1, x) -> -1 + if (N0C && N0C->isAllOnesValue()) + return N0; + // fold (sra x, (setge c, size(x))) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (sra x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports + // sext_inreg. + if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { + unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); + if (VT.isVector()) + ExtVT = EVT::getVectorVT(*DAG.getContext(), + ExtVT, VT.getVectorNumElements()); + if ((!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + N0.getOperand(0), DAG.getValueType(ExtVT)); + } + + // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRA) { + if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); + if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(Sum, N1C->getValueType(0))); + } + } + + // fold (sra (shl X, m), (sub result_size, n)) + // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for + // result_size - n != m. + // If truncate is free for the target sext(shl) is likely to result in better + // code. + if (N0.getOpcode() == ISD::SHL) { + // Get the two constanst of the shifts, CN0 = m, CN = n. + const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (N01C && N1C) { + // Determine what the truncate's result bitsize and type would be. + EVT TruncVT = + EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue()); + // Determine the residual right-shift amount. + signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); + + // If the shift is not a no-op (in which case this should be just a sign + // extend already), the truncated to type is legal, sign_extend is legal + // on that type, and the truncate to that type is both legal and free, + // perform the transform. + if ((ShiftAmt > 0) && + TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && + TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && + TLI.isTruncateFree(VT, TruncVT)) { + + SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy()); + SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, + N0.getOperand(0), Amt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, + Shift); + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), + N->getValueType(0), Trunc); + } + } + } + + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + EVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), + TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + // Simplify, based on bits shifted out of the LHS. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + + // If the sign bit is known to be zero, switch this to a SRL. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); + + if (N1C) { + SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSRA.getNode()) + return NewSRA; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSRL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + + // fold (srl c1, c2) -> c1 >>u c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); + // fold (srl 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (srl x, c >= size(x)) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (srl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (srl x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(OpSizeInBits))) + return DAG.getConstant(0, VT); + + // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 > OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + + // fold (srl (shl x, c), c) -> (and x, cst2) + if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && + N0.getValueSizeInBits() <= 64) { + uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, VT)); + } + + + // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + // Shifting in all undef bits? + EVT SmallVT = N0.getOperand(0).getValueType(); + if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) + return DAG.getUNDEF(VT); + + if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + N0.getOperand(0), N1); + AddToWorkList(SmallShift.getNode()); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + } + } + + // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign + // bit, which is unmodified by sra. + if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { + if (N0.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); + } + + // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). + if (N1C && N0.getOpcode() == ISD::CTLZ && + N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); + DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); + + // If any of the input bits are KnownOne, then the input couldn't be all + // zeros, thus the result of the srl will always be zero. + if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); + + // If all of the bits input the to ctlz node are known to be zero, then + // the result of the ctlz is "32" and the result of the shift is one. + APInt UnknownBits = ~KnownZero & Mask; + if (UnknownBits == 0) return DAG.getConstant(1, VT); + + // Otherwise, check to see if there is exactly one bit input to the ctlz. + if ((UnknownBits & (UnknownBits - 1)) == 0) { + // Okay, we know that only that the single bit specified by UnknownBits + // could be set on input to the CTLZ node. If this bit is set, the SRL + // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair + // to an SRL/XOR pair, which is likely to simplify more. + unsigned ShAmt = UnknownBits.countTrailingZeros(); + SDValue Op = N0.getOperand(0); + + if (ShAmt) { + Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, + DAG.getConstant(ShAmt, getShiftAmountTy())); + AddToWorkList(Op.getNode()); + } + + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + Op, DAG.getConstant(1, VT)); + } + } + + // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + EVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), + TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + // fold operands of srl based on knowledge that the low bits are not + // demanded. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + if (N1C) { + SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSRL.getNode()) + return NewSRL; + } + + // Attempt to convert a srl of a load into a narrower zero-extending load. + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + + // Here is a common situation. We want to optimize: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // However when after the source operand of SRL is optimized into AND, the SRL + // itself may not be optimized further. Look for it and add the BRCOND into + // the worklist. + if (N->hasOneUse()) { + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { + // Also look pass the truncate. + Use = *Use->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitCTLZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctlz c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTTZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTPOP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctpop c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); + EVT VT = N->getValueType(0); + EVT VT0 = N0.getValueType(); + + // fold (select C, X, X) -> X + if (N1 == N2) + return N1; + // fold (select true, X, Y) -> X + if (N0C && !N0C->isNullValue()) + return N1; + // fold (select false, X, Y) -> Y + if (N0C && N0C->isNullValue()) + return N2; + // fold (select C, 1, X) -> (or C, X) + if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + // fold (select C, 0, 1) -> (xor C, 1) + if (VT.isInteger() && + (VT0 == MVT::i1 || + (VT0.isInteger() && + TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) && + N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { + SDValue XORNode; + if (VT == VT0) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, + N0, DAG.getConstant(1, VT0)); + XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, + N0, DAG.getConstant(1, VT0)); + AddToWorkList(XORNode.getNode()); + if (VT.bitsGT(VT0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); + } + // fold (select C, 0, X) -> (and (not C), X) + if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { + SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + AddToWorkList(NOTNode.getNode()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); + } + // fold (select C, X, 1) -> (or (not C), X) + if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { + SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + AddToWorkList(NOTNode.getNode()); + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); + } + // fold (select C, X, 0) -> (and C, X) + if (VT == MVT::i1 && N2C && N2C->isNullValue()) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + // fold (select X, X, Y) -> (or X, Y) + // fold (select X, 1, Y) -> (or X, Y) + if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + // fold (select X, Y, X) -> (and X, Y) + // fold (select X, Y, 0) -> (and X, Y) + if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + + // fold selects based on a setcc into other things, such as min/max/abs + if (N0.getOpcode() == ISD::SETCC) { + // FIXME: + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), + N1, N2, N0.getOperand(2)); + return SimplifySelect(N->getDebugLoc(), N0, N1, N2); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + SDValue N4 = N->getOperand(4); + ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); + + // fold select_cc lhs, rhs, x, x, cc -> x + if (N2 == N3) + return N2; + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC, N->getDebugLoc(), false); + if (SCC.getNode()) AddToWorkList(SCC.getNode()); + + if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } + + // Fold to a simpler select_cc + if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N2, N3)) + return SDValue(N, 0); // Don't revisit N. + + // fold select_cc into other things, such as min/max/abs + return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); +} + +SDValue DAGCombiner::visitSETCC(SDNode *N) { + return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast<CondCodeSDNode>(N->getOperand(2))->get(), + N->getDebugLoc()); +} + +// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: +// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" +// transformation. Returns true if extension are possible and the above +// mentioned transformation is profitable. +static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, + unsigned ExtOpc, + SmallVector<SDNode*, 4> &ExtendNodes, + const TargetLowering &TLI) { + bool HasCopyToRegUses = false; + bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); + for (SDNode::use_iterator UI = N0.getNode()->use_begin(), + UE = N0.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User == N) + continue; + if (UI.getUse().getResNo() != N0.getResNo()) + continue; + // FIXME: Only extend SETCC N, N and SETCC N, c for now. + if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); + if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) + // Sign bits will be lost after a zext. + return false; + bool Add = false; + for (unsigned i = 0; i != 2; ++i) { + SDValue UseOp = User->getOperand(i); + if (UseOp == N0) + continue; + if (!isa<ConstantSDNode>(UseOp)) + return false; + Add = true; + } + if (Add) + ExtendNodes.push_back(User); + continue; + } + // If truncates aren't free and there are users we can't + // extend, it isn't worthwhile. + if (!isTruncFree) + return false; + // Remember if this value is live-out. + if (User->getOpcode() == ISD::CopyToReg) + HasCopyToRegUses = true; + } + + if (HasCopyToRegUses) { + bool BothLiveOut = false; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { + BothLiveOut = true; + break; + } + } + if (BothLiveOut) + // Both unextended and extended values are live out. There had better be + // good a reason for the transformation. + return ExtendNodes.size(); + } + return true; +} + +SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (sext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); + + // fold (sext (sext x)) -> (sext x) + // fold (sext (aext x)) -> (sext x) + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); + + if (N0.getOpcode() == ISD::TRUNCATE) { + // fold (sext (truncate (load x))) -> (sext (smaller load x)) + // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // See if the value being truncated is already sign extended. If so, just + // eliminate the trunc/sext pair. + SDValue Op = N0.getOperand(0); + unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); + unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); + unsigned DestBits = VT.getScalarType().getSizeInBits(); + unsigned NumSignBits = DAG.ComputeNumSignBits(Op); + + if (OpBits == DestBits) { + // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign + // bits, it is already ready. + if (NumSignBits > DestBits-MidBits) + return Op; + } else if (OpBits < DestBits) { + // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign + // bits, just sext from i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); + } else { + // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign + // bits, just truncate to i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + } + + // fold (sext (truncate x)) -> (sextinreg x). + if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, + N0.getValueType())) { + if (OpBits < DestBits) + Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); + else if (OpBits > DestBits) + Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, + DAG.getValueType(N0.getValueType())); + } + } + + // fold (sext (load x)) -> (sext (truncate (sextload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (sext (sextload x)) -> (sext (truncate (sextload x))) + // fold (sext ( extload x)) -> (sext (truncate (sextload x))) + if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + if (N0.getOpcode() == ISD::SETCC) { + // sext(setcc) -> sext_in_reg(vsetcc) for vectors. + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N0VT = N0.getOperand(0).getValueType(); + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + else { + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } + } + + // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + unsigned ElementWidth = VT.getScalarType().getSizeInBits(); + SDValue NegOne = + DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + NegOne, DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + DAG.getSetCC(N->getDebugLoc(), + TLI.getSetCCResultType(VT), + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + NegOne, DAG.getConstant(0, VT)); + } + + // fold (sext x) -> (zext x) if the sign bit is known zero. + if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && + DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (zext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + // fold (zext (zext x)) -> (zext x) + // fold (zext (aext x)) -> (zext x) + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); + + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + } + } + + // fold (zext (truncate x)) -> (and x, mask) + if (N0.getOpcode() == ISD::TRUNCATE && + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + SDValue Op = N0.getOperand(0); + if (Op.getValueType().bitsLT(VT)) { + Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); + } else if (Op.getValueType().bitsGT(VT)) { + Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + } + return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), + N0.getValueType().getScalarType()); + } + + // Fold (zext (and (trunc x), cst)) -> (and x, cst), + // if either of the casts is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + } + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + X, DAG.getConstant(Mask, VT)); + } + + // fold (zext (load x)) -> (zext (truncate (zextload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (zext (zextload x)) -> (zext (truncate (zextload x))) + // fold (zext ( extload x)) -> (zext (truncate (zextload x))) + if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), + ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + if (N0.getOpcode() == ISD::SETCC) { + if (!LegalOperations && VT.isVector()) { + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + // Only do this before legalize for now. + EVT N0VT = N0.getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); + SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), + DAG.getConstant(1, EltVT)); + if (VT.getSizeInBits() == N0VT.getSizeInBits()) { + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); + } else { + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); + } + } + + // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + } + + // (zext (shl (zext x), cst)) -> (shl (zext x), cst) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N0.hasOneUse()) { + if (N0.getOpcode() == ISD::SHL) { + // If the original shl may be shifting out bits, do not perform this + // transformation. + unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() - + N0.getOperand(0).getOperand(0).getValueType().getSizeInBits(); + if (ShAmt > KnownZeroBits) + return SDValue(); + } + DebugLoc dl = N->getDebugLoc(); + return DAG.getNode(N0.getOpcode(), dl, VT, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), + DAG.getNode(ISD::ZERO_EXTEND, dl, + N0.getOperand(1).getValueType(), + N0.getOperand(1))); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (aext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + if (N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); + + // fold (aext (truncate (load x))) -> (aext (smaller load x)) + // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + } + } + + // fold (aext (truncate x)) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue TruncOp = N0.getOperand(0); + if (TruncOp.getValueType() == VT) + return TruncOp; // x iff x size == zext size. + if (TruncOp.getValueType().bitsGT(VT)) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); + } + + // Fold (aext (and (trunc x), cst)) -> (and x, cst) + // if the trunc is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType())) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); + } + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + X, DAG.getConstant(Mask, VT)); + } + + // fold (aext (load x)) -> (aext (truncate (extload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (aext (zextload x)) -> (aext (truncate (zextload x))) + // fold (aext (sextload x)) -> (aext (truncate (sextload x))) + // fold (aext ( extload x)) -> (aext (truncate (extload x))) + if (N0.getOpcode() == ISD::LOAD && + !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT, + N->getDebugLoc(), + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), + LN0->getSrcValueOffset(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + if (N0.getOpcode() == ISD::SETCC) { + // aext(setcc) -> sext_in_reg(vsetcc) for vectors. + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N0VT = N0.getOperand(0).getValueType(); + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + else { + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } + } + + // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) + return SCC; + } + + return SDValue(); +} + +/// GetDemandedBits - See if the specified operand can be simplified with the +/// knowledge that only the bits specified by Mask are used. If so, return the +/// simpler operand, otherwise return a null SDValue. +SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { + switch (V.getOpcode()) { + default: break; + case ISD::OR: + case ISD::XOR: + // If the LHS or RHS don't contribute bits to the or, drop them. + if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) + return V.getOperand(1); + if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) + return V.getOperand(0); + break; + case ISD::SRL: + // Only look at single-use SRLs. + if (!V.getNode()->hasOneUse()) + break; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + // See if we can recursively simplify the LHS. + unsigned Amt = RHSC->getZExtValue(); + + // Watch out for shift count overflow though. + if (Amt >= Mask.getBitWidth()) break; + APInt NewMask = Mask << Amt; + SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); + if (SimplifyLHS.getNode()) + return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), + SimplifyLHS, V.getOperand(1)); + } + } + return SDValue(); +} + +/// ReduceLoadWidth - If the result of a wider load is shifted to right of N +/// bits and then truncated to a narrower type and where N is a multiple +/// of number of bits of the narrower type, transform it to a narrower load +/// from address + N / num of bits of new type. If the result is to be +/// extended, also fold the extension to form a extending load. +SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { + unsigned Opc = N->getOpcode(); + + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT ExtVT = VT; + + // This transformation isn't valid for vector loads. + if (VT.isVector()) + return SDValue(); + + // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then + // extended to VT. + if (Opc == ISD::SIGN_EXTEND_INREG) { + ExtType = ISD::SEXTLOAD; + ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) + return SDValue(); + } else if (Opc == ISD::SRL) { + // Annother special-case: SRL is basically zero-extending a narrower + // value. + ExtType = ISD::ZEXTLOAD; + N0 = SDValue(N, 0); + ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01) return SDValue(); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() - N01->getZExtValue()); + } + + unsigned EVTBits = ExtVT.getSizeInBits(); + unsigned ShAmt = 0; + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) { + if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + ShAmt = N01->getZExtValue(); + // Is the shift amount a multiple of size of VT? + if ((ShAmt & (EVTBits-1)) == 0) { + N0 = N0.getOperand(0); + // Is the load width a multiple of size of VT? + if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) + return SDValue(); + } + } + } + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() && + cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits && + // Do not change the width of a volatile load. + !cast<LoadSDNode>(N0)->isVolatile()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT PtrType = N0.getOperand(1).getValueType(); + + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) { + unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); + ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; + } + + uint64_t PtrOff = ShAmt / 8; + unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); + SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), + PtrType, LN0->getBasePtr(), + DAG.getConstant(PtrOff, PtrType)); + AddToWorkList(NewPtr.getNode()); + + SDValue Load = (ExtType == ISD::NON_EXTLOAD) + ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, + LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) + : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + NewAlign); + + // Replace the old load's chain with the new load's chain. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), + &DeadNodes); + + // Return the new loaded value. + return Load; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT EVT = cast<VTSDNode>(N1)->getVT(); + unsigned VTBits = VT.getScalarType().getSizeInBits(); + unsigned EVTBits = EVT.getScalarType().getSizeInBits(); + + // fold (sext_in_reg c1) -> c1 + if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); + + // If the input is already sign extended, just drop the extension. + if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) + return N0; + + // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + N0.getOperand(0), N1); + } + + // fold (sext_in_reg (sext x)) -> (sext x) + // fold (sext_in_reg (aext x)) -> (sext x) + // if x is small enough. + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && + (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); + } + + // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. + if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) + return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); + + // fold operands of sext_in_reg based on knowledge that the top bits are not + // demanded. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (sext_in_reg (load x)) -> (smaller sextload x) + // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + + // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) + // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. + // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. + if (N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if (ShAmt->getZExtValue()+EVTBits <= VTBits) { + // We can turn this into an SRA iff the input to the SRL is already sign + // extended enough. + unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); + if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1)); + } + } + + // fold (sext_inreg (extload x)) -> (sextload x) + if (ISD::isEXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use + if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse() && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + return SDValue(); +} + +SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // noop truncate + if (N0.getValueType() == N->getValueType(0)) + return N0; + // fold (truncate c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); + // fold (truncate (truncate x)) -> (truncate x) + if (N0.getOpcode() == ISD::TRUNCATE) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + // fold (truncate (ext x)) -> (ext x) or (truncate x) or x + if (N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND) { + if (N0.getOperand(0).getValueType().bitsLT(VT)) + // if the source is smaller than the dest, we still need an extend + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0)); + else if (N0.getOperand(0).getValueType().bitsGT(VT)) + // if the source is larger than the dest, than we just need the truncate + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + else + // if the source and dest are the same type, we can drop both the extend + // and the truncate. + return N0.getOperand(0); + } + + // See if we can simplify the input to this truncate through knowledge that + // only the low bits are being used. For example "trunc (or (shl x, 8), y)" + // -> trunc y + SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits())); + if (Shorter.getNode()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + + // fold (truncate (load x)) -> (smaller load x) + // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { + SDValue Reduced = ReduceLoadWidth(N); + if (Reduced.getNode()) + return Reduced; + } + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +static SDNode *getBuildPairElt(SDNode *N, unsigned i) { + SDValue Elt = N->getOperand(i); + if (Elt.getOpcode() != ISD::MERGE_VALUES) + return Elt.getNode(); + return Elt.getOperand(Elt.getResNo()).getNode(); +} + +/// CombineConsecutiveLoads - build_pair (load, load) -> load +/// if load locations are consecutive. +SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { + assert(N->getOpcode() == ISD::BUILD_PAIR); + + LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); + LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + return SDValue(); + EVT LD1VT = LD1->getValueType(0); + + if (ISD::isNON_EXTLoad(LD2) && + LD2->hasOneUse() && + // If both are volatile this would reduce the number of volatile loads. + // If one is volatile it might be ok, but play conservative and bail out. + !LD1->isVolatile() && + !LD2->isVolatile() && + DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { + unsigned Align = LD1->getAlignment(); + unsigned NewAlign = TLI.getTargetData()-> + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign <= Align && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) + return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), + LD1->getBasePtr(), LD1->getSrcValue(), + LD1->getSrcValueOffset(), false, false, Align); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // If the input is a BUILD_VECTOR with all constant elements, fold this now. + // Only do this before legalize, since afterward the target may be depending + // on the bitconvert. + // First check to see if this is all constant. + if (!LegalTypes && + N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && + VT.isVector()) { + bool isSimple = true; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) + if (N0.getOperand(i).getOpcode() != ISD::UNDEF && + N0.getOperand(i).getOpcode() != ISD::Constant && + N0.getOperand(i).getOpcode() != ISD::ConstantFP) { + isSimple = false; + break; + } + + EVT DestEltVT = N->getValueType(0).getVectorElementType(); + assert(!DestEltVT.isVector() && + "Element type of vector ValueType must not be vector!"); + if (isSimple) + return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT); + } + + // If the input is a constant, let getNode fold it. + if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { + SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); + if (Res.getNode() != N) { + if (!LegalOperations || + TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) + return Res; + + // Folding it resulted in an illegal node, and it's too late to + // do that. Clean up the old node and forego the transformation. + // Ideally this won't happen very often, because instcombine + // and the earlier dagcombine runs (where illegal nodes are + // permitted) should have folded most of them already. + DAG.DeleteNode(Res.getNode()); + } + } + + // (conv (conv x, t1), t2) -> (conv x, t2) + if (N0.getOpcode() == ISD::BIT_CONVERT) + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, + N0.getOperand(0)); + + // fold (conv (load x)) -> (load (conv*)x) + // If the resultant load doesn't need a higher alignment than the original! + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + // Do not change the width of a volatile load. + !cast<LoadSDNode>(N0)->isVolatile() && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + unsigned Align = TLI.getTargetData()-> + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + unsigned OrigAlign = LN0->getAlignment(); + + if (Align <= OrigAlign) { + SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), + LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), LN0->isNonTemporal(), + OrigAlign); + AddToWorkList(N); + CombineTo(N0.getNode(), + DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + N0.getValueType(), Load), + Load.getValue(1)); + return Load; + } + } + + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) + // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // This often reduces constant pool loads. + if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && + N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { + SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT, + N0.getOperand(0)); + AddToWorkList(NewConv.getNode()); + + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + if (N0.getOpcode() == ISD::FNEG) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + NewConv, DAG.getConstant(SignBit, VT)); + assert(N0.getOpcode() == ISD::FABS); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + NewConv, DAG.getConstant(~SignBit, VT)); + } + + // fold (bitconvert (fcopysign cst, x)) -> + // (or (and (bitconvert x), sign), (and cst, (not sign))) + // Note that we don't handle (copysign x, cst) because this can always be + // folded to an fneg or fabs. + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(0)) && + VT.isInteger() && !VT.isVector()) { + unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); + EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); + if (isTypeLegal(IntXVT)) { + SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + IntXVT, N0.getOperand(1)); + AddToWorkList(X.getNode()); + + // If X has a different width than the result/lhs, sext it or truncate it. + unsigned VTWidth = VT.getSizeInBits(); + if (OrigXWidth < VTWidth) { + X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); + AddToWorkList(X.getNode()); + } else if (OrigXWidth > VTWidth) { + // To get the sign bit in the right place, we have to shift it right + // before truncating. + X = DAG.getNode(ISD::SRL, X.getDebugLoc(), + X.getValueType(), X, + DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); + AddToWorkList(X.getNode()); + X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + AddToWorkList(X.getNode()); + } + + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, + X, DAG.getConstant(SignBit, VT)); + AddToWorkList(X.getNode()); + + SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + VT, N0.getOperand(0)); + Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, + Cst, DAG.getConstant(~SignBit, VT)); + AddToWorkList(Cst.getNode()); + + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); + } + } + + // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. + if (N0.getOpcode() == ISD::BUILD_PAIR) { + SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); + if (CombineLD.getNode()) + return CombineLD; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { + EVT VT = N->getValueType(0); + return CombineConsecutiveLoads(N, VT); +} + +/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector +/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the +/// destination element value type. +SDValue DAGCombiner:: +ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { + EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); + + // If this is already the right type, we're done. + if (SrcEltVT == DstEltVT) return SDValue(BV, 0); + + unsigned SrcBitSize = SrcEltVT.getSizeInBits(); + unsigned DstBitSize = DstEltVT.getSizeInBits(); + + // If this is a conversion of N elements of one type to N elements of another + // type, convert each element. This handles FP<->INT cases. + if (SrcBitSize == DstBitSize) { + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + BV->getValueType(0).getVectorNumElements()); + + // Due to the FP element handling below calling this routine recursively, + // we can end up with a scalar-to-vector node here. + if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), + DstEltVT, BV->getOperand(0))); + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + SDValue Op = BV->getOperand(i); + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (Op.getValueType() != SrcEltVT) + Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); + Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), + DstEltVT, Op)); + AddToWorkList(Ops.back().getNode()); + } + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + + // Otherwise, we're growing or shrinking the elements. To avoid having to + // handle annoying details of growing/shrinking FP values, we convert them to + // int first. + if (SrcEltVT.isFloatingPoint()) { + // Convert the input float vector to a int vector where the elements are the + // same sizes. + assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); + BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); + SrcEltVT = IntVT; + } + + // Now we know the input is an integer vector. If the output is a FP type, + // convert to integer first, then to FP of the right size. + if (DstEltVT.isFloatingPoint()) { + assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); + EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); + SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); + + // Next, convert to FP elements of the same size. + return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT); + } + + // Okay, we know the src/dst types are both integers of differing types. + // Handling growing first. + assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); + if (SrcBitSize < DstBitSize) { + unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; + i += NumInputsPerOutput) { + bool isLE = TLI.isLittleEndian(); + APInt NewBits = APInt(DstBitSize, 0); + bool EltIsUndef = true; + for (unsigned j = 0; j != NumInputsPerOutput; ++j) { + // Shift the previously computed bits over. + NewBits <<= SrcBitSize; + SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); + if (Op.getOpcode() == ISD::UNDEF) continue; + EltIsUndef = false; + + NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). + zextOrTrunc(SrcBitSize).zext(DstBitSize); + } + + if (EltIsUndef) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); + } + + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + + // Finally, this must be the case where we are shrinking elements: each input + // turns into multiple outputs. + bool isS2V = ISD::isScalarToVector(BV); + unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + NumOutputsPerInput*BV->getNumOperands()); + SmallVector<SDValue, 8> Ops; + + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { + for (unsigned j = 0; j != NumOutputsPerInput; ++j) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + continue; + } + + APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))-> + getAPIntValue()).zextOrTrunc(SrcBitSize); + + for (unsigned j = 0; j != NumOutputsPerInput; ++j) { + APInt ThisVal = APInt(OpVal).trunc(DstBitSize); + Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); + if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal) + // Simply turn this into a SCALAR_TO_VECTOR of the new type. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + Ops[0]); + OpVal = OpVal.lshr(DstBitSize); + } + + // For big endian targets, swap the order of the pieces of each element. + if (TLI.isBigEndian()) + std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); + } + + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); +} + +SDValue DAGCombiner::visitFADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fadd c1, c2) -> (fadd c1, c2) + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); + // fold (fadd A, 0) -> A + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + // fold (fadd A, (fneg B)) -> (fsub A, B) + if (isNegatibleForFree(N1, LegalOperations) == 2) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations)); + // fold (fadd (fneg A), B) -> (fsub B, A) + if (isNegatibleForFree(N0, LegalOperations) == 2) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, + GetNegatedExpression(N0, DAG, LegalOperations)); + + // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) + if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && + N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + N0.getOperand(1), N1)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fsub c1, c2) -> c1-c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); + // fold (fsub A, 0) -> A + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + // fold (fsub 0, B) -> -B + if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations)) + return GetNegatedExpression(N1, DAG, LegalOperations); + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); + } + // fold (fsub A, (fneg B)) -> (fadd A, B) + if (isNegatibleForFree(N1, LegalOperations)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fmul c1, c2) -> c1*c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); + // fold (fmul A, 0) -> 0 + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N1; + // fold (fmul A, 0) -> 0, vector edition. + if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + // fold (fmul X, 2.0) -> (fadd X, X) + if (N1CFP && N1CFP->isExactlyValue(+2.0)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); + // fold (fmul X, -1.0) -> (fneg X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); + + // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations)); + } + } + + // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && + N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), N1)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fdiv c1, c2) -> c1/c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + + + // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold (frem c1, c2) -> fmod(c1,c2) + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); + + return SDValue(); +} + +SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); + + if (N1CFP) { + const APFloat& V = N1CFP->getValueAPF(); + // copysign(x, c1) -> fabs(x) iff ispos(c1) + // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) + if (!V.isNegative()) { + if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + } else { + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); + } + } + + // copysign(fabs(x), y) -> copysign(x, y) + // copysign(fneg(x), y) -> copysign(x, y) + // copysign(copysign(x,z), y) -> copysign(x, y) + if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || + N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0.getOperand(0), N1); + + // copysign(x, abs(y)) -> abs(x) + if (N1.getOpcode() == ISD::FABS) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + + // copysign(x, copysign(y,z)) -> copysign(x, z) + if (N1.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0, N1.getOperand(1)); + + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0, N1.getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + + // fold (sint_to_fp c1) -> c1fp + if (N0C && OpVT != MVT::ppcf128) + return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + + // If the input is a legal type, and SINT_TO_FP is not legal on this target, + // but UINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to UINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + + // fold (uint_to_fp c1) -> c1fp + if (N0C && OpVT != MVT::ppcf128) + return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + + // If the input is a legal type, and UINT_TO_FP is not legal on this target, + // but SINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to SINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fp_to_sint c1fp) -> c1 + if (N0CFP) + return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fp_to_uint c1fp) -> c1 + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fp_round c1fp) -> c1fp + if (N0CFP && N0.getValueType() != MVT::ppcf128) + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); + + // fold (fp_round (fp_extend x)) -> x + if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) + return N0.getOperand(0); + + // fold (fp_round (fp_round x)) -> (fp_round x) + if (N0.getOpcode() == ISD::FP_ROUND) { + // This is a value preserving truncation if both round's are. + bool IsTrunc = N->getConstantOperandVal(1) == 1 && + N0.getNode()->getConstantOperandVal(1) == 1; + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getIntPtrConstant(IsTrunc)); + } + + // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { + SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, + N0.getOperand(0), N1); + AddToWorkList(Tmp.getNode()); + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + Tmp, N0.getOperand(1)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + + // fold (fp_round_inreg c1fp) -> c1fp + if (N0CFP && isTypeLegal(EVT)) { + SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. + if (N->hasOneUse() && + N->use_begin()->getOpcode() == ISD::FP_ROUND) + return SDValue(); + + // fold (fp_extend c1fp) -> c1fp + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); + + // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the + // value of X. + if (N0.getOpcode() == ISD::FP_ROUND + && N0.getNode()->getConstantOperandVal(1) == 1) { + SDValue In = N0.getOperand(0); + if (In.getValueType() == VT) return In; + if (VT.bitsLT(In.getValueType())) + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, + In, N0.getOperand(1)); + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); + } + + // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), + N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFNEG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (isNegatibleForFree(N0, LegalOperations)) + return GetNegatedExpression(N0, DAG, LegalOperations); + + // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading + // constant pool values. + if (N0.getOpcode() == ISD::BIT_CONVERT && + !VT.isVector() && + N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger()) { + SDValue Int = N0.getOperand(0); + EVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, + DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + AddToWorkList(Int.getNode()); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + VT, Int); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFABS(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fabs c1) -> fabs(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + // fold (fabs (fabs x)) -> (fabs x) + if (N0.getOpcode() == ISD::FABS) + return N->getOperand(0); + // fold (fabs (fneg x)) -> (fabs x) + // fold (fabs (fcopysign x, y)) -> (fabs x) + if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); + + // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading + // constant pool values. + if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger() && + !N0.getOperand(0).getValueType().isVector()) { + SDValue Int = N0.getOperand(0); + EVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, + DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + AddToWorkList(Int.getNode()); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Int); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBRCOND(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal + // on the target. + if (N1.getOpcode() == ISD::SETCC && + TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { + return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + Chain, N1.getOperand(2), + N1.getOperand(0), N1.getOperand(1), N2); + } + + SDNode *Trunc = 0; + if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) { + // Look past truncate. + Trunc = N1.getNode(); + N1 = N1.getOperand(0); + } + + if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) { + // Match this pattern so that we can generate simpler code: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and i32 %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // This applies only when the AND constant value has one bit set and the + // SRL constant is equal to the log2 of the AND constant. The back-end is + // smart enough to convert the result into a TEST/JMP sequence. + SDValue Op0 = N1.getOperand(0); + SDValue Op1 = N1.getOperand(1); + + if (Op0.getOpcode() == ISD::AND && + Op1.getOpcode() == ISD::Constant) { + SDValue AndOp1 = Op0.getOperand(1); + + if (AndOp1.getOpcode() == ISD::Constant) { + const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); + + if (AndConst.isPowerOf2() && + cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { + SDValue SetCC = + DAG.getSetCC(N->getDebugLoc(), + TLI.getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, Op0.getValueType()), + ISD::SETNE); + + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + // Don't add the new BRCond into the worklist or else SimplifySelectCC + // will convert it back to (X & C1) >> C2. + CombineTo(N, NewBRCond, false); + // Truncate is dead. + if (Trunc) { + removeFromWorkList(Trunc); + DAG.DeleteNode(Trunc); + } + // Replace the uses of SRL with SETCC + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); + removeFromWorkList(N1.getNode()); + DAG.DeleteNode(N1.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + } + + // Transform br(xor(x, y)) -> br(x != y) + // Transform br(xor(xor(x,y), 1)) -> br (x == y) + if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { + SDNode *TheXor = N1.getNode(); + SDValue Op0 = TheXor->getOperand(0); + SDValue Op1 = TheXor->getOperand(1); + if (Op0.getOpcode() == Op1.getOpcode()) { + // Avoid missing important xor optimizations. + SDValue Tmp = visitXOR(TheXor); + if (Tmp.getNode() && Tmp.getNode() != TheXor) { + DEBUG(dbgs() << "\nReplacing.8 "; + TheXor->dump(&DAG); + dbgs() << "\nWith: "; + Tmp.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes); + removeFromWorkList(TheXor); + DAG.DeleteNode(TheXor); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, Tmp, N2); + } + } + + if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { + bool Equal = false; + if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) + if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && + Op0.getOpcode() == ISD::XOR) { + TheXor = Op0.getNode(); + Equal = true; + } + + SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1; + + EVT SetCCVT = NodeToReplace.getValueType(); + if (LegalTypes) + SetCCVT = TLI.getSetCCResultType(SetCCVT); + SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), + SetCCVT, + Op0, Op1, + Equal ? ISD::SETEQ : ISD::SETNE); + // Replace the uses of XOR with SETCC + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes); + removeFromWorkList(NodeToReplace.getNode()); + DAG.DeleteNode(NodeToReplace.getNode()); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + } + } + + return SDValue(); +} + +// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. +// +SDValue DAGCombiner::visitBR_CC(SDNode *N) { + CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); + SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + + // Use SimplifySetCC to simplify SETCC's. + SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), + CondLHS, CondRHS, CC->get(), N->getDebugLoc(), + false); + if (Simp.getNode()) AddToWorkList(Simp.getNode()); + + // fold to a simpler setcc + if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + N->getOperand(0), Simp.getOperand(2), + Simp.getOperand(0), Simp.getOperand(1), + N->getOperand(4)); + + return SDValue(); +} + +/// CombineToPreIndexedLoadStore - Try turning a load / store into a +/// pre-indexed load / store when the base pointer is an add or subtract +/// and it has other uses besides the load / store. After the +/// transformation, the new indexed load / store has effectively folded +/// the add / subtract in and all of its other uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (!LegalOperations) + return false; + + bool isLoad = true; + SDValue Ptr; + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail + // out. There is no reason to make this a preinc/predec. + if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || + Ptr.getNode()->hasOneUse()) + return false; + + // Ask the target to do addressing mode selection. + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) + return false; + // Don't create a indexed load / store with zero offset. + if (isa<ConstantSDNode>(Offset) && + cast<ConstantSDNode>(Offset)->isNullValue()) + return false; + + // Try turning it into a pre-indexed load / store except when: + // 1) The new base ptr is a frame index. + // 2) If N is a store and the new base ptr is either the same as or is a + // predecessor of the value being stored. + // 3) Another use of old base ptr is a predecessor of N. If ptr is folded + // that would create a cycle. + // 4) All uses are load / store ops that use it as old base ptr. + + // Check #1. Preinc'ing a frame index would require copying the stack pointer + // (plus the implicit offset) to a register to preinc anyway. + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + return false; + + // Check #2. + if (!isLoad) { + SDValue Val = cast<StoreSDNode>(N)->getValue(); + if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) + return false; + } + + // Now check for #3 and #4. + bool RealUse = false; + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), + E = Ptr.getNode()->use_end(); I != E; ++I) { + SDNode *Use = *I; + if (Use == N) + continue; + if (Use->isPredecessorOf(N)) + return false; + + if (!((Use->getOpcode() == ISD::LOAD && + cast<LoadSDNode>(Use)->getBasePtr() == Ptr) || + (Use->getOpcode() == ISD::STORE && + cast<StoreSDNode>(Use)->getBasePtr() == Ptr))) + RealUse = true; + } + + if (!RealUse) + return false; + + SDValue Result; + if (isLoad) + Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + else + Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + ++PreIndexedNodes; + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.4 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), + &DeadNodes); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), + &DeadNodes); + } + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + // Replace the uses of Ptr with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0), + &DeadNodes); + removeFromWorkList(Ptr.getNode()); + DAG.DeleteNode(Ptr.getNode()); + + return true; +} + +/// CombineToPostIndexedLoadStore - Try to combine a load / store with a +/// add / sub of the base pointer node into a post-indexed load / store. +/// The transformation folded the add / subtract into the new indexed +/// load / store effectively and all of its uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { + if (!LegalOperations) + return false; + + bool isLoad = true; + SDValue Ptr; + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + if (Ptr.getNode()->hasOneUse()) + return false; + + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), + E = Ptr.getNode()->use_end(); I != E; ++I) { + SDNode *Op = *I; + if (Op == N || + (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) + continue; + + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { + // Don't create a indexed load / store with zero offset. + if (isa<ConstantSDNode>(Offset) && + cast<ConstantSDNode>(Offset)->isNullValue()) + continue; + + // Try turning it into a post-indexed load / store except when + // 1) All uses are load / store ops that use it as base ptr. + // 2) Op must be independent of N, i.e. Op is neither a predecessor + // nor a successor of N. Otherwise, if Op is folded that would + // create a cycle. + + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + continue; + + // Check for #1. + bool TryNext = false; + for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), + EE = BasePtr.getNode()->use_end(); II != EE; ++II) { + SDNode *Use = *II; + if (Use == Ptr.getNode()) + continue; + + // If all the uses are load / store addresses, then don't do the + // transformation. + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + bool RealUse = false; + for (SDNode::use_iterator III = Use->use_begin(), + EEE = Use->use_end(); III != EEE; ++III) { + SDNode *UseUse = *III; + if (!((UseUse->getOpcode() == ISD::LOAD && + cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) || + (UseUse->getOpcode() == ISD::STORE && + cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use))) + RealUse = true; + } + + if (!RealUse) { + TryNext = true; + break; + } + } + } + + if (TryNext) + continue; + + // Check for #2 + if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { + SDValue Result = isLoad + ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM) + : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + ++PostIndexedNodes; + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.5 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), + &DeadNodes); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), + &DeadNodes); + } + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + // Replace the uses of Use with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), + Result.getValue(isLoad ? 1 : 0), + &DeadNodes); + removeFromWorkList(Op); + DAG.DeleteNode(Op); + return true; + } + } + } + + return false; +} + +SDValue DAGCombiner::visitLOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + // If load is not volatile and there are no uses of the loaded value (and + // the updated indexed value in case of indexed loads), change uses of the + // chain value into uses of the chain input (i.e. delete the dead load). + if (!LD->isVolatile()) { + if (N->getValueType(1) == MVT::Other) { + // Unindexed loads. + if (N->hasNUsesOfValue(0, 0)) { + // It's not safe to use the two value CombineTo variant here. e.g. + // v1, chain2 = load chain1, loc + // v2, chain3 = load chain2, loc + // v3 = add v2, c + // Now we replace use of chain2 with chain1. This makes the second load + // isomorphic to the one we are deleting, and thus makes this load live. + DEBUG(dbgs() << "\nReplacing.6 "; + N->dump(&DAG); + dbgs() << "\nWith chain: "; + Chain.getNode()->dump(&DAG); + dbgs() << "\n"); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); + + if (N->use_empty()) { + removeFromWorkList(N); + DAG.DeleteNode(N); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } else { + // Indexed loads. + assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); + if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { + SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + DEBUG(dbgs() << "\nReplacing.7 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Undef.getNode()->dump(&DAG); + dbgs() << " and 2 other values\n"); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), + DAG.getUNDEF(N->getValueType(1)), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes); + removeFromWorkList(N); + DAG.DeleteNode(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + // If this load is directly stored, replace the load value with the stored + // value. + // TODO: Handle store large -> read small portion. + // TODO: Handle TRUNCSTORE/LOADEXT + if (LD->getExtensionType() == ISD::NON_EXTLOAD && + !LD->isVolatile()) { + if (ISD::isNON_TRUNCStore(Chain.getNode())) { + StoreSDNode *PrevST = cast<StoreSDNode>(Chain); + if (PrevST->getBasePtr() == Ptr && + PrevST->getValue().getValueType() == N->getValueType(0)) + return CombineTo(N, Chain.getOperand(1), Chain); + } + } + + // Try to infer better alignment information than the load already has. + if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { + if (Align > LD->getAlignment()) + return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), + N->getDebugLoc(), + Chain, Ptr, LD->getSrcValue(), + LD->getSrcValueOffset(), LD->getMemoryVT(), + LD->isVolatile(), LD->isNonTemporal(), Align); + } + } + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + SDValue ReplLoad; + + // Replace the chain to void dependency. + if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), + BetterChain, Ptr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + } else { + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), + LD->getDebugLoc(), + BetterChain, Ptr, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->getMemoryVT(), + LD->isVolatile(), + LD->isNonTemporal(), + LD->getAlignment()); + } + + // Create token factor to keep old chain connected. + SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, Chain, ReplLoad.getValue(1)); + + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + + // Replace uses with load result and token factor. Don't add users + // to work list. + return CombineTo(N, ReplLoad.getValue(0), Token, false); + } + } + + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + return SDValue(); +} + +/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the +/// load is having specific bytes cleared out. If so, return the byte size +/// being masked out and the shift amount. +static std::pair<unsigned, unsigned> +CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { + std::pair<unsigned, unsigned> Result(0, 0); + + // Check for the structure we're looking for. + if (V->getOpcode() != ISD::AND || + !isa<ConstantSDNode>(V->getOperand(1)) || + !ISD::isNormalLoad(V->getOperand(0).getNode())) + return Result; + + // Check the chain and pointer. + LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); + if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. + + // The store should be chained directly to the load or be an operand of a + // tokenfactor. + if (LD == Chain.getNode()) + ; // ok. + else if (Chain->getOpcode() != ISD::TokenFactor) + return Result; // Fail. + else { + bool isOk = false; + for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) + if (Chain->getOperand(i).getNode() == LD) { + isOk = true; + break; + } + if (!isOk) return Result; + } + + // This only handles simple types. + if (V.getValueType() != MVT::i16 && + V.getValueType() != MVT::i32 && + V.getValueType() != MVT::i64) + return Result; + + // Check the constant mask. Invert it so that the bits being masked out are + // 0 and the bits being kept are 1. Use getSExtValue so that leading bits + // follow the sign bit for uniformity. + uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); + unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); + if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. + unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); + if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. + if (NotMaskLZ == 64) return Result; // All zero mask. + + // See if we have a continuous run of bits. If so, we have 0*1+0* + if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) + return Result; + + // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. + if (V.getValueType() != MVT::i64 && NotMaskLZ) + NotMaskLZ -= 64-V.getValueSizeInBits(); + + unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; + switch (MaskedBytes) { + case 1: + case 2: + case 4: break; + default: return Result; // All one mask, or 5-byte mask. + } + + // Verify that the first bit starts at a multiple of mask so that the access + // is aligned the same as the access width. + if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; + + Result.first = MaskedBytes; + Result.second = NotMaskTZ/8; + return Result; +} + + +/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that +/// provides a value as specified by MaskInfo. If so, replace the specified +/// store with a narrower store of truncated IVal. +static SDNode * +ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, + SDValue IVal, StoreSDNode *St, + DAGCombiner *DC) { + unsigned NumBytes = MaskInfo.first; + unsigned ByteShift = MaskInfo.second; + SelectionDAG &DAG = DC->getDAG(); + + // Check to see if IVal is all zeros in the part being masked in by the 'or' + // that uses this. If not, this is not a replacement. + APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), + ByteShift*8, (ByteShift+NumBytes)*8); + if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; + + // Check that it is legal on the target to do this. It is legal if the new + // VT we're shrinking to (i8/i16/i32) is legal or we're still before type + // legalization. + MVT VT = MVT::getIntegerVT(NumBytes*8); + if (!DC->isTypeLegal(VT)) + return 0; + + // Okay, we can do this! Replace the 'St' store with a store of IVal that is + // shifted by ByteShift and truncated down to NumBytes. + if (ByteShift) + IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, + DAG.getConstant(ByteShift*8, DC->getShiftAmountTy())); + + // Figure out the offset for the store and the alignment of the access. + unsigned StOffset; + unsigned NewAlign = St->getAlignment(); + + if (DAG.getTargetLoweringInfo().isLittleEndian()) + StOffset = ByteShift; + else + StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; + + SDValue Ptr = St->getBasePtr(); + if (StOffset) { + Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), + Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); + NewAlign = MinAlign(NewAlign, StOffset); + } + + // Truncate down to the new size. + IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); + + ++OpsNarrowed; + return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, + St->getSrcValue(), St->getSrcValueOffset()+StOffset, + false, false, NewAlign).getNode(); +} + + +/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is +/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some +/// of the loaded bits, try narrowing the load and store if it would end up +/// being a win for performance or code size. +SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + if (ST->isVolatile()) + return SDValue(); + + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + EVT VT = Value.getValueType(); + + if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) + return SDValue(); + + unsigned Opc = Value.getOpcode(); + + // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst + // is a byte mask indicating a consecutive number of bytes, check to see if + // Y is known to provide just those bytes. If so, we try to replace the + // load + replace + store sequence with a single (narrower) store, which makes + // the load dead. + if (Opc == ISD::OR) { + std::pair<unsigned, unsigned> MaskedLoad; + MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(1), ST,this)) + return SDValue(NewST, 0); + + // Or is commutative, so try swapping X and Y. + MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(0), ST,this)) + return SDValue(NewST, 0); + } + + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || + Value.getOperand(1).getOpcode() != ISD::Constant) + return SDValue(); + + SDValue N0 = Value.getOperand(0); + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + Chain == SDValue(N0.getNode(), 1)) { + LoadSDNode *LD = cast<LoadSDNode>(N0); + if (LD->getBasePtr() != Ptr) + return SDValue(); + + // Find the type to narrow it the load / op / store to. + SDValue N1 = Value.getOperand(1); + unsigned BitWidth = N1.getValueSizeInBits(); + APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); + if (Opc == ISD::AND) + Imm ^= APInt::getAllOnesValue(BitWidth); + if (Imm == 0 || Imm.isAllOnesValue()) + return SDValue(); + unsigned ShAmt = Imm.countTrailingZeros(); + unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; + unsigned NewBW = NextPowerOf2(MSB - ShAmt); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + while (NewBW < BitWidth && + !(TLI.isOperationLegalOrCustom(Opc, NewVT) && + TLI.isNarrowingProfitable(VT, NewVT))) { + NewBW = NextPowerOf2(NewBW); + NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + } + if (NewBW >= BitWidth) + return SDValue(); + + // If the lsb changed does not start at the type bitwidth boundary, + // start at the previous one. + if (ShAmt % NewBW) + ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; + APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); + if ((Imm & Mask) == Imm) { + APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); + if (Opc == ISD::AND) + NewImm ^= APInt::getAllOnesValue(NewBW); + uint64_t PtrOff = ShAmt / 8; + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) + PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + + unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); + const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); + if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy)) + return SDValue(); + + SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), + Ptr.getValueType(), Ptr, + DAG.getConstant(PtrOff, Ptr.getValueType())); + SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), + LD->getChain(), NewPtr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), LD->isNonTemporal(), + NewAlign); + SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, + DAG.getConstant(NewImm, NewVT)); + SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), + NewVal, NewPtr, + ST->getSrcValue(), ST->getSrcValueOffset(), + false, false, NewAlign); + + AddToWorkList(NewPtr.getNode()); + AddToWorkList(NewLD.getNode()); + AddToWorkList(NewVal.getNode()); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1), + &DeadNodes); + ++OpsNarrowed; + return NewST; + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSTORE(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + + // If this is a store of a bit convert, store the input value if the + // resultant store does not need a higher alignment than the original. + if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && + ST->isUnindexed()) { + unsigned OrigAlign = ST->getAlignment(); + EVT SVT = Value.getOperand(0).getValueType(); + unsigned Align = TLI.getTargetData()-> + getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); + if (Align <= OrigAlign && + ((!LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) + return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), + ST->isNonTemporal(), OrigAlign); + } + + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { + // NOTE: If the original store is volatile, this transform must not increase + // the number of stores. For example, on x86-32 an f64 can be stored in one + // processor operation but an i64 (which is not legal) requires two. So the + // transform should not be done in this case. + if (Value.getOpcode() != ISD::TargetConstantFP) { + SDValue Tmp; + switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP type"); + case MVT::f80: // We don't do this for these yet. + case MVT::f128: + case MVT::ppcf128: + break; + case MVT::f32: + if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue(), MVT::i32); + return DAG.getStore(Chain, N->getDebugLoc(), Tmp, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); + } + break; + case MVT::f64: + if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + getZExtValue(), MVT::i64); + return DAG.getStore(Chain, N->getDebugLoc(), Tmp, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); + } else if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + // Many FP stores are not made apparent until after legalize, e.g. for + // argument passing. Since this is so common, custom legalize the + // 64-bit integer store into two 32-bit stores. + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + + SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), + isVolatile, isNonTemporal, + ST->getAlignment()); + Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, + DAG.getConstant(4, Ptr.getValueType())); + SVOffset += 4; + Alignment = MinAlign(Alignment, 4U); + SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, + Ptr, ST->getSrcValue(), + SVOffset, isVolatile, isNonTemporal, + Alignment); + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + St0, St1); + } + + break; + } + } + } + + // Try to infer better alignment information than the store already has. + if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { + if (Align > ST->getAlignment()) + return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), ST->isNonTemporal(), Align); + } + } + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + SDValue ReplStore; + + // Replace the chain to avoid dependency. + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, + ST->getSrcValue(),ST->getSrcValueOffset(), + ST->getMemoryVT(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); + } else { + ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); + } + + // Create token to keep both nodes around. + SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, Chain, ReplStore); + + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + + // Don't add users to work list. + return CombineTo(N, Token, false); + } + } + + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + // FIXME: is there such a thing as a truncating indexed store? + if (ST->isTruncatingStore() && ST->isUnindexed() && + Value.getValueType().isInteger()) { + // See if we can simplify the input to this truncstore with knowledge that + // only the low bits are being used. For example: + // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + SDValue Shorter = + GetDemandedBits(Value, + APInt::getLowBitsSet(Value.getValueSizeInBits(), + ST->getMemoryVT().getSizeInBits())); + AddToWorkList(Value.getNode()); + if (Shorter.getNode()) + return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); + + // Otherwise, see if we can simplify the operation with + // SimplifyDemandedBits, which only works if the value has a single use. + if (SimplifyDemandedBits(Value, + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits()))) + return SDValue(N, 0); + } + + // If this is a load followed by a store to the same location, then the store + // is dead/noop. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { + if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && + ST->isUnindexed() && !ST->isVolatile() && + // There can't be any side effects between the load and store, such as + // a call or store. + Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { + // The store is dead, remove it. + return Chain; + } + } + + // If this is an FP_ROUND or TRUNC followed by a store, fold this into a + // truncating store. We can do this even if this is already a truncstore. + if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) + && Value.getNode()->hasOneUse() && ST->isUnindexed() && + TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), + ST->getMemoryVT())) { + return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); + } + + return ReduceLoadOpStoreWidth(N); +} + +SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { + SDValue InVec = N->getOperand(0); + SDValue InVal = N->getOperand(1); + SDValue EltNo = N->getOperand(2); + + // If the inserted element is an UNDEF, just use the input vector. + if (InVal.getOpcode() == ISD::UNDEF) + return InVec; + + // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new + // vector with the inserted element. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(), + InVec.getNode()->op_end()); + if (Elt < Ops.size()) + Ops[Elt] = InVal; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + InVec.getValueType(), &Ops[0], Ops.size()); + } + // If the invec is an UNDEF and if EltNo is a constant, create a new + // BUILD_VECTOR with undef elements and the inserted element. + if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && + isa<ConstantSDNode>(EltNo)) { + EVT VT = InVec.getValueType(); + EVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT)); + + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + if (Elt < Ops.size()) + Ops[Elt] = InVal; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + InVec.getValueType(), &Ops[0], Ops.size()); + } + return SDValue(); +} + +SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { + // (vextract (scalar_to_vector val, 0) -> val + SDValue InVec = N->getOperand(0); + + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // Check if the result type doesn't match the inserted element type. A + // SCALAR_TO_VECTOR may truncate the inserted element and the + // EXTRACT_VECTOR_ELT may widen the extracted vector. + SDValue InOp = InVec.getOperand(0); + EVT NVT = N->getValueType(0); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); + } + return InOp; + } + + // Perform only after legalization to ensure build_vector / vector_shuffle + // optimizations have already been done. + if (!LegalOperations) return SDValue(); + + // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) + SDValue EltNo = N->getOperand(1); + + if (isa<ConstantSDNode>(EltNo)) { + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + bool NewLoad = false; + bool BCNumEltsChanged = false; + EVT VT = InVec.getValueType(); + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; + + if (InVec.getOpcode() == ISD::BIT_CONVERT) { + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + ExtVT = BCVT.getVectorElementType(); + NewLoad = true; + } + + LoadSDNode *LN0 = NULL; + const ShuffleVectorSDNode *SVN = NULL; + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast<LoadSDNode>(InVec); + } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && + InVec.getOperand(0).getValueType() == ExtVT && + ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + LN0 = cast<LoadSDNode>(InVec.getOperand(0)); + } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { + // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) + // => + // (load $addr+1*size) + + // If the bit convert changed the number of elements, it is unsafe + // to examine the mask. + if (BCNumEltsChanged) + return SDValue(); + + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt); + InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); + + if (InVec.getOpcode() == ISD::BIT_CONVERT) + InVec = InVec.getOperand(0); + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast<LoadSDNode>(InVec); + Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; + } + } + + if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile()) + return SDValue(); + + unsigned Align = LN0->getAlignment(); + if (NewLoad) { + // Check the resultant load doesn't need a higher alignment than the + // original load. + unsigned NewAlign = + TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) + return SDValue(); + + Align = NewAlign; + } + + SDValue NewPtr = LN0->getBasePtr(); + if (Elt) { + unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; + EVT PtrType = NewPtr.getValueType(); + if (TLI.isBigEndian()) + PtrOff = VT.getSizeInBits() / 8 - PtrOff; + NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, + DAG.getConstant(PtrOff, PtrType)); + } + + return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), LN0->isNonTemporal(), Align); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + unsigned NumInScalars = N->getNumOperands(); + EVT VT = N->getValueType(0); + + // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT + // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from + // at most two distinct vectors, turn this into a shuffle node. + SDValue VecIn1, VecIn2; + for (unsigned i = 0; i != NumInScalars; ++i) { + // Ignore undef inputs. + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + // If this input is something other than a EXTRACT_VECTOR_ELT with a + // constant index, bail out. + if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + + // If the input vector type disagrees with the result of the build_vector, + // we can't make a shuffle. + SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); + if (ExtractedFromVec.getValueType() != VT) { + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + + // Otherwise, remember this. We allow up to two distinct input vectors. + if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) + continue; + + if (VecIn1.getNode() == 0) { + VecIn1 = ExtractedFromVec; + } else if (VecIn2.getNode() == 0) { + VecIn2 = ExtractedFromVec; + } else { + // Too many inputs. + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + } + + // If everything is good, we can make a shuffle operation. + if (VecIn1.getNode()) { + SmallVector<int, 8> Mask; + for (unsigned i = 0; i != NumInScalars; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // If extracting from the first vector, just use the index directly. + SDValue Extract = N->getOperand(i); + SDValue ExtVal = Extract.getOperand(1); + if (Extract.getOperand(0) == VecIn1) { + unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); + if (ExtIndex > VT.getVectorNumElements()) + return SDValue(); + + Mask.push_back(ExtIndex); + continue; + } + + // Otherwise, use InIdx + VecSize + unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); + Mask.push_back(Idx+NumInScalars); + } + + // Add count and size info. + if (!isTypeLegal(VT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + SDValue Ops[2]; + Ops[0] = VecIn1; + Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { + // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of + // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector + // inputs come from at most two distinct vectors, turn this into a shuffle + // node. + + // If we only have one input vector, we don't need to do any concatenation. + if (N->getNumOperands() == 1) + return N->getOperand(0); + + return SDValue(); +} + +SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + + assert(N0.getValueType().getVectorNumElements() == NumElts && + "Vector shuffle must be normalized in DAG"); + + // FIXME: implement canonicalizations from DAG.getVectorShuffle() + + // If it is a splat, check if the argument vector is a build_vector with + // all scalar elements the same. + if (cast<ShuffleVectorSDNode>(N)->isSplat()) { + SDNode *V = N0.getNode(); + + // If this is a bit convert that changes the element type of the vector but + // not the number of vector elements, look through it. Be careful not to + // look though conversions that change things like v4f32 to v2f64. + if (V->getOpcode() == ISD::BIT_CONVERT) { + SDValue ConvInput = V->getOperand(0); + if (ConvInput.getValueType().isVector() && + ConvInput.getValueType().getVectorNumElements() == NumElts) + V = ConvInput.getNode(); + } + + if (V->getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElems = V->getNumOperands(); + unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex(); + if (NumElems > BaseIdx) { + SDValue Base; + bool AllSame = true; + for (unsigned i = 0; i != NumElems; ++i) { + if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + Base = V->getOperand(i); + break; + } + } + // Splat of <u, u, u, u>, return <u, u, u, u> + if (!Base.getNode()) + return N0; + for (unsigned i = 0; i != NumElems; ++i) { + if (V->getOperand(i) != Base) { + AllSame = false; + break; + } + } + // Splat of <x, x, x, x>, return <x, x, x, x> + if (AllSame) + return N0; + } + } + } + return SDValue(); +} + +SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { + if (!TLI.getShouldFoldAtomicFences()) + return SDValue(); + + SDValue atomic = N->getOperand(0); + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + break; + default: + return SDValue(); + } + + SDValue fence = atomic.getOperand(0); + if (fence.getOpcode() != ISD::MEMBARRIER) + return SDValue(); + + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2), + atomic.getOperand(3)), atomic.getResNo()); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2)), + atomic.getResNo()); + default: + return SDValue(); + } +} + +/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform +/// an AND to a vector_shuffle with the destination vector and a zero vector. +/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> +/// vector_shuffle V, Zero, <0, 4, 2, 4> +SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (N->getOpcode() == ISD::AND) { + if (RHS.getOpcode() == ISD::BIT_CONVERT) + RHS = RHS.getOperand(0); + if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<int, 8> Indices; + unsigned NumElts = RHS.getNumOperands(); + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Elt = RHS.getOperand(i); + if (!isa<ConstantSDNode>(Elt)) + return SDValue(); + else if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) + Indices.push_back(i); + else if (cast<ConstantSDNode>(Elt)->isNullValue()) + Indices.push_back(NumElts); + else + return SDValue(); + } + + // Let's see if the target supports this vector_shuffle. + EVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + EVT EltVT = RVT.getVectorElementType(); + SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, EltVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + RVT, &ZeroOps[0], ZeroOps.size()); + LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); + } + } + + return SDValue(); +} + +/// SimplifyVBinOp - Visit a binary vector operation, like ADD. +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { + // After legalize, the target may be depending on adds and other + // binary ops to provide legal ways to construct constants or other + // things. Simplifying them may result in a loss of legality. + if (LegalOperations) return SDValue(); + + EVT VT = N->getValueType(0); + assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); + + EVT EltType = VT.getVectorElementType(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Shuffle = XformToShuffleWithZero(N); + if (Shuffle.getNode()) return Shuffle; + + // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold + // this operation. + if (LHS.getOpcode() == ISD::BUILD_VECTOR && + RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + SDValue LHSOp = LHS.getOperand(i); + SDValue RHSOp = RHS.getOperand(i); + // If these two elements can't be folded, bail out. + if ((LHSOp.getOpcode() != ISD::UNDEF && + LHSOp.getOpcode() != ISD::Constant && + LHSOp.getOpcode() != ISD::ConstantFP) || + (RHSOp.getOpcode() != ISD::UNDEF && + RHSOp.getOpcode() != ISD::Constant && + RHSOp.getOpcode() != ISD::ConstantFP)) + break; + + // Can't fold divide by zero. + if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || + N->getOpcode() == ISD::FDIV) { + if ((RHSOp.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || + (RHSOp.getOpcode() == ISD::ConstantFP && + cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) + break; + } + + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (LHSOp.getValueType() != EltType) + LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp); + if (RHSOp.getValueType() != EltType) + RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp); + + SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType, + LHSOp, RHSOp); + if (FoldOp.getOpcode() != ISD::UNDEF && + FoldOp.getOpcode() != ISD::Constant && + FoldOp.getOpcode() != ISD::ConstantFP) + break; + Ops.push_back(FoldOp); + AddToWorkList(FoldOp.getNode()); + } + + if (Ops.size() == LHS.getNumOperands()) { + EVT VT = LHS.getValueType(); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, + SDValue N1, SDValue N2){ + assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); + + SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + + // If we got a simplified select_cc node back from SimplifySelectCC, then + // break it down into a new SETCC node, and a new SELECT node, and then return + // the SELECT node, since we were called with a SELECT node. + if (SCC.getNode()) { + // Check to see if we got a select_cc back (to turn into setcc/select). + // Otherwise, just return whatever node we got back, like fabs. + if (SCC.getOpcode() == ISD::SELECT_CC) { + SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), + N0.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), + SCC.getOperand(4)); + AddToWorkList(SETCC.getNode()); + return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), + SCC.getOperand(2), SCC.getOperand(3), SETCC); + } + + return SCC; + } + return SDValue(); +} + +/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS +/// are the two values being selected between, see if we can simplify the +/// select. Callers of this should assume that TheSelect is deleted if this +/// returns true. As such, they should return the appropriate thing (e.g. the +/// node) back to the top-level of the DAG combiner loop to avoid it being +/// looked at. +bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, + SDValue RHS) { + + // If this is a select from two identical things, try to pull the operation + // through the select. + if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){ + // If this is a load and the token chain is identical, replace the select + // of two loads with a load through a select of the address to load from. + // This triggers in things like "select bool X, 10.0, 123.0" after the FP + // constants have been dropped into the constant pool. + if (LHS.getOpcode() == ISD::LOAD && + // Do not let this transformation reduce the number of volatile loads. + !cast<LoadSDNode>(LHS)->isVolatile() && + !cast<LoadSDNode>(RHS)->isVolatile() && + // Token chains must be identical. + LHS.getOperand(0) == RHS.getOperand(0)) { + LoadSDNode *LLD = cast<LoadSDNode>(LHS); + LoadSDNode *RLD = cast<LoadSDNode>(RHS); + + // If this is an EXTLOAD, the VT's must match. + if (LLD->getMemoryVT() == RLD->getMemoryVT()) { + // FIXME: this discards src value information. This is + // over-conservative. It would be beneficial to be able to remember + // both potential memory locations. Since we are discarding + // src value info, don't do the transformation if the memory + // locations are not in the default address space. + unsigned LLDAddrSpace = 0, RLDAddrSpace = 0; + if (const Value *LLDVal = LLD->getMemOperand()->getValue()) { + if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType())) + LLDAddrSpace = PT->getAddressSpace(); + } + if (const Value *RLDVal = RLD->getMemOperand()->getValue()) { + if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType())) + RLDAddrSpace = PT->getAddressSpace(); + } + SDValue Addr; + if (LLDAddrSpace == 0 && RLDAddrSpace == 0) { + if (TheSelect->getOpcode() == ISD::SELECT) { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if ((!LLD->hasAnyUseOfValue(1) || + !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && + (!RLD->hasAnyUseOfValue(1) || + !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { + Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } + } else { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if ((!LLD->hasAnyUseOfValue(1) || + (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && + (!RLD->hasAnyUseOfValue(1) || + (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { + Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } + } + } + + if (Addr.getNode()) { + SDValue Load; + if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { + Load = DAG.getLoad(TheSelect->getValueType(0), + TheSelect->getDebugLoc(), + LLD->getChain(), + Addr, 0, 0, + LLD->isVolatile(), + LLD->isNonTemporal(), + LLD->getAlignment()); + } else { + Load = DAG.getExtLoad(LLD->getExtensionType(), + TheSelect->getValueType(0), + TheSelect->getDebugLoc(), + LLD->getChain(), Addr, 0, 0, + LLD->getMemoryVT(), + LLD->isVolatile(), + LLD->isNonTemporal(), + LLD->getAlignment()); + } + + // Users of the select now use the result of the load. + CombineTo(TheSelect, Load); + + // Users of the old loads now use the new load's chain. We know the + // old-load value is dead now. + CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); + CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); + return true; + } + } + } + } + + return false; +} + +/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 +/// where 'cond' is the comparison specified by CC. +SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, + ISD::CondCode CC, bool NotExtCompare) { + // (x ? y : y) -> y. + if (N2 == N3) return N2; + + EVT VT = N2.getValueType(); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC, DL, false); + if (SCC.getNode()) AddToWorkList(SCC.getNode()); + ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); + + // fold select_cc true, x, y -> x + if (SCCC && !SCCC->isNullValue()) + return N2; + // fold select_cc false, x, y -> y + if (SCCC && SCCC->isNullValue()) + return N3; + + // Check to see if we can simplify the select into an fabs node + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { + // Allow either -0.0 or 0.0 + if (CFP->getValueAPF().isZero()) { + // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs + if ((CC == ISD::SETGE || CC == ISD::SETGT) && + N0 == N2 && N3.getOpcode() == ISD::FNEG && + N2 == N3.getOperand(0)) + return DAG.getNode(ISD::FABS, DL, VT, N0); + + // select (setl[te] X, +/-0.0), fneg(X), X -> fabs + if ((CC == ISD::SETLT || CC == ISD::SETLE) && + N0 == N3 && N2.getOpcode() == ISD::FNEG && + N2.getOperand(0) == N3) + return DAG.getNode(ISD::FABS, DL, VT, N3); + } + } + + // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" + // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 + // in it. This is a win when the constant is not otherwise available because + // it replaces two constant pool loads with one. We only do this if the FP + // type is known to be legal, because if it isn't, then we are before legalize + // types an we want the other legalization to happen first (e.g. to avoid + // messing with soft float) and if the ConstantFP is not legal, because if + // it is legal, we may not need to store the FP constant in a constant pool. + if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) + if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { + if (TLI.isTypeLegal(N2.getValueType()) && + (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != + TargetLowering::Legal) && + // If both constants have multiple uses, then we won't need to do an + // extra load, they are likely around in registers for other users. + (TV->hasOneUse() || FV->hasOneUse())) { + Constant *Elts[] = { + const_cast<ConstantFP*>(FV->getConstantFPValue()), + const_cast<ConstantFP*>(TV->getConstantFPValue()) + }; + const Type *FPTy = Elts[0]->getType(); + const TargetData &TD = *TLI.getTargetData(); + + // Create a ConstantArray of the two constants. + Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2); + SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), + TD.getPrefTypeAlignment(FPTy)); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + + // Get the offsets to the 0 and 1 element of the array so that we can + // select between them. + SDValue Zero = DAG.getIntPtrConstant(0); + unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); + SDValue One = DAG.getIntPtrConstant(EltSize); + + SDValue Cond = DAG.getSetCC(DL, + TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), + Cond, One, Zero); + CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, + CstOffset); + return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, false, + false, Alignment); + + } + } + + // Check to see if we can perform the "gzip trick", transforming + // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) + if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && + N0.getValueType().isInteger() && + N2.getValueType().isInteger() && + (N1C->isNullValue() || // (a < 0) ? b : 0 + (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 + EVT XType = N0.getValueType(); + EVT AType = N2.getValueType(); + if (XType.bitsGE(AType)) { + // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a + // single-bit constant. + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { + unsigned ShCtV = N2C->getAPIntValue().logBase2(); + ShCtV = XType.getSizeInBits()-ShCtV-1; + SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy()); + SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), + XType, N0, ShCt); + AddToWorkList(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorkList(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), + XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + AddToWorkList(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorkList(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + } + + // fold select C, 16, 0 -> shl C, 4 + if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && + TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) { + + // If the caller doesn't want us to simplify this into a zext of a compare, + // don't do it. + if (NotExtCompare && N2C->getAPIntValue() == 1) + return SDValue(); + + // Get a SetCC of the condition + // FIXME: Should probably make sure that setcc is legal if we ever have a + // target where it isn't. + SDValue Temp, SCC; + // cast from setcc result type to select result type + if (LegalTypes) { + SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + if (N2.getValueType().bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } else { + SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } + + AddToWorkList(SCC.getNode()); + AddToWorkList(Temp.getNode()); + + if (N2C->getAPIntValue() == 1) + return Temp; + + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy())); + } + + // Check to see if this is the equivalent of setcc + // FIXME: Turn all of these into setcc if setcc if setcc is legal + // otherwise, go ahead with the folds. + if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { + EVT XType = N0.getValueType(); + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { + SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); + if (Res.getValueType() != VT) + Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); + return Res; + } + + // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) + if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && + (!LegalOperations || + TLI.isOperationLegal(ISD::CTLZ, XType))) { + SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); + return DAG.getNode(ISD::SRL, DL, XType, Ctlz, + DAG.getConstant(Log2_32(XType.getSizeInBits()), + getShiftAmountTy())); + } + // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) + if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { + SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), + XType, DAG.getConstant(0, XType), N0); + SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); + return DAG.getNode(ISD::SRL, DL, XType, + DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + } + // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) + if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { + SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); + } + } + + // Check to see if this is an integer abs. + // select_cc setg[te] X, 0, X, -X -> + // select_cc setgt X, -1, X, -X -> + // select_cc setl[te] X, 0, -X, X -> + // select_cc setlt X, 1, -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N1C) { + ConstantSDNode *SubC = NULL; + if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (N1C->isAllOnesValue() && CC == ISD::SETGT)) && + N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) + SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); + else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || + (N1C->isOne() && CC == ISD::SETLT)) && + N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) + SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); + + EVT XType = N0.getValueType(); + if (SubC && SubC->isNullValue() && XType.isInteger()) { + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + XType, N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); + } + } + + return SDValue(); +} + +/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. +SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, + SDValue N1, ISD::CondCode Cond, + DebugLoc DL, bool foldBooleans) { + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); + return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue DAGCombiner::BuildSDIV(SDNode *N) { + std::vector<SDNode*> Built; + SDValue S = TLI.BuildSDIV(N, DAG, &Built); + + for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue DAGCombiner::BuildUDIV(SDNode *N) { + std::vector<SDNode*> Built; + SDValue S = TLI.BuildUDIV(N, DAG, &Built); + + for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// FindBaseOffset - Return true if base is a frame index, which is known not +// to alias with anything but itself. Provides base object and offset as results. +static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, + const GlobalValue *&GV, void *&CV) { + // Assume it is a primitive operation. + Base = Ptr; Offset = 0; GV = 0; CV = 0; + + // If it's an adding a simple constant then integrate the offset. + if (Base.getOpcode() == ISD::ADD) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { + Base = Base.getOperand(0); + Offset += C->getZExtValue(); + } + } + + // Return the underlying GlobalValue, and update the Offset. Return false + // for GlobalAddressSDNode since the same GlobalAddress may be represented + // by multiple nodes with different offsets. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { + GV = G->getGlobal(); + Offset += G->getOffset(); + return false; + } + + // Return the underlying Constant value, and update the Offset. Return false + // for ConstantSDNodes since the same constant pool entry may be represented + // by multiple nodes with different offsets. + if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { + CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() + : (void *)C->getConstVal(); + Offset += C->getOffset(); + return false; + } + // If it's any of the following then it can't alias with anything but itself. + return isa<FrameIndexSDNode>(Base); +} + +/// isAlias - Return true if there is any possibility that the two addresses +/// overlap. +bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, + SDValue Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2) const { + // If they are the same then they must be aliases. + if (Ptr1 == Ptr2) return true; + + // Gather base node and offset information. + SDValue Base1, Base2; + int64_t Offset1, Offset2; + const GlobalValue *GV1, *GV2; + void *CV1, *CV2; + bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); + bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); + + // If they have a same base address then check to see if they overlap. + if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) + return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + + // If we know what the bases are, and they aren't identical, then we know they + // cannot alias. + if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) + return false; + + // If we know required SrcValue1 and SrcValue2 have relatively large alignment + // compared to the size and offset of the access, we may be able to prove they + // do not alias. This check is conservative for now to catch cases created by + // splitting vector types. + if ((SrcValueAlign1 == SrcValueAlign2) && + (SrcValueOffset1 != SrcValueOffset2) && + (Size1 == Size2) && (SrcValueAlign1 > Size1)) { + int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; + int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; + + // There is no overlap between these relatively aligned accesses of similar + // size, return no alias. + if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) + return false; + } + + if (CombinerGlobalAA) { + // Use alias analysis information. + int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); + int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; + int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; + AliasAnalysis::AliasResult AAResult = + AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2); + if (AAResult == AliasAnalysis::NoAlias) + return false; + } + + // Otherwise we have to assume they alias. + return true; +} + +/// FindAliasInfo - Extracts the relevant alias information from the memory +/// node. Returns true if the operand was a load. +bool DAGCombiner::FindAliasInfo(SDNode *N, + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, + int &SrcValueOffset, + unsigned &SrcValueAlign) const { + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + Ptr = LD->getBasePtr(); + Size = LD->getMemoryVT().getSizeInBits() >> 3; + SrcValue = LD->getSrcValue(); + SrcValueOffset = LD->getSrcValueOffset(); + SrcValueAlign = LD->getOriginalAlignment(); + return true; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + Ptr = ST->getBasePtr(); + Size = ST->getMemoryVT().getSizeInBits() >> 3; + SrcValue = ST->getSrcValue(); + SrcValueOffset = ST->getSrcValueOffset(); + SrcValueAlign = ST->getOriginalAlignment(); + } else { + llvm_unreachable("FindAliasInfo expected a memory operand"); + } + + return false; +} + +/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, +/// looking for aliasing nodes and adding them to the Aliases vector. +void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVector<SDValue, 8> &Aliases) { + SmallVector<SDValue, 8> Chains; // List of chains to visit. + SmallPtrSet<SDNode *, 16> Visited; // Visited node set. + + // Get alias information for node. + SDValue Ptr; + int64_t Size; + const Value *SrcValue; + int SrcValueOffset; + unsigned SrcValueAlign; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, + SrcValueAlign); + + // Starting off. + Chains.push_back(OriginalChain); + unsigned Depth = 0; + + // Look at each chain and determine if it is an alias. If so, add it to the + // aliases list. If not, then continue up the chain looking for the next + // candidate. + while (!Chains.empty()) { + SDValue Chain = Chains.back(); + Chains.pop_back(); + + // For TokenFactor nodes, look at each operand and only continue up the + // chain until we find two aliases. If we've seen two aliases, assume we'll + // find more and revert to original chain since the xform is unlikely to be + // profitable. + // + // FIXME: The depth check could be made to return the last non-aliasing + // chain we found before we hit a tokenfactor rather than the original + // chain. + if (Depth > 6 || Aliases.size() == 2) { + Aliases.clear(); + Aliases.push_back(OriginalChain); + break; + } + + // Don't bother if we've been before. + if (!Visited.insert(Chain.getNode())) + continue; + + switch (Chain.getOpcode()) { + case ISD::EntryToken: + // Entry token is ideal chain operand, but handled in FindBetterChain. + break; + + case ISD::LOAD: + case ISD::STORE: { + // Get alias information for Chain. + SDValue OpPtr; + int64_t OpSize; + const Value *OpSrcValue; + int OpSrcValueOffset; + unsigned OpSrcValueAlign; + bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, + OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign); + + // If chain is alias then stop here. + if (!(IsLoad && IsOpLoad) && + isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, + OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign)) { + Aliases.push_back(Chain); + } else { + // Look further up the chain. + Chains.push_back(Chain.getOperand(0)); + ++Depth; + } + break; + } + + case ISD::TokenFactor: + // We have to check each of the operands of the token factor for "small" + // token factors, so we queue them up. Adding the operands to the queue + // (stack) in reverse order maintains the original order and increases the + // likelihood that getNode will find a matching token factor (CSE.) + if (Chain.getNumOperands() > 16) { + Aliases.push_back(Chain); + break; + } + for (unsigned n = Chain.getNumOperands(); n;) + Chains.push_back(Chain.getOperand(--n)); + ++Depth; + break; + + default: + // For all other instructions we will just have to take what we can get. + Aliases.push_back(Chain); + break; + } + } +} + +/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking +/// for a better chain (aliasing node.) +SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { + SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. + + // Accumulate all the aliases to this node. + GatherAllAliases(N, OldChain, Aliases); + + if (Aliases.size() == 0) { + // If no operands then chain to entry token. + return DAG.getEntryNode(); + } else if (Aliases.size() == 1) { + // If a single operand then chain to it. We don't need to revisit it. + return Aliases[0]; + } + + // Construct a custom tailored token factor. + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + &Aliases[0], Aliases.size()); +} + +// SelectionDAG::Combine - This is the entry point for the file. +// +void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, + CodeGenOpt::Level OptLevel) { + /// run - This is the main entry point to this class. + /// + DAGCombiner(*this, AA, OptLevel).Run(Level); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp new file mode 100644 index 0000000..a4eed71 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -0,0 +1,1241 @@ +//===-- FastISel.cpp - Implementation of the FastISel class ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the FastISel class. +// +// "Fast" instruction selection is designed to emit very poor code quickly. +// Also, it is not designed to be able to do much lowering, so most illegal +// types (e.g. i64 on 32-bit targets) and operations are not supported. It is +// also not intended to be able to do much optimization, except in a few cases +// where doing optimizations reduces overall compile time. For example, folding +// constants into immediate fields is often done, because it's cheap and it +// reduces the number of instructions later phases have to examine. +// +// "Fast" instruction selection is able to fail gracefully and transfer +// control to the SelectionDAG selector for operations that it doesn't +// support. In many cases, this allows us to avoid duplicating a lot of +// the complicated lowering logic that SelectionDAG currently has. +// +// The intended use for "fast" instruction selection is "-O0" mode +// compilation, where the quality of the generated code is irrelevant when +// weighed against the speed at which the code can be generated. Also, +// at -O0, the LLVM optimizers are not running, and this makes the +// compile time of codegen a much higher portion of the overall compile +// time. Despite its limitations, "fast" instruction selection is able to +// handle enough code on its own to provide noticeable overall speedups +// in -O0 compiles. +// +// Basic operations are supported in a target-independent way, by reading +// the same instruction descriptions that the SelectionDAG selector reads, +// and identifying simple arithmetic operations that can be directly selected +// from simple operators. More complicated operations currently require +// target-specific code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +/// startNewBlock - Set the current block to which generated machine +/// instructions will be appended, and clear the local CSE map. +/// +void FastISel::startNewBlock() { + LocalValueMap.clear(); + + // Start out as null, meaining no local-value instructions have + // been emitted. + LastLocalValue = 0; + + // Advance the last local value past any EH_LABEL instructions. + MachineBasicBlock::iterator + I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end(); + while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) { + LastLocalValue = I; + ++I; + } +} + +bool FastISel::hasTrivialKill(const Value *V) const { + // Don't consider constants or arguments to have trivial kills. + const Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + // No-op casts are trivially coalesced by fast-isel. + if (const CastInst *Cast = dyn_cast<CastInst>(I)) + if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) && + !hasTrivialKill(Cast->getOperand(0))) + return false; + + // Only instructions with a single use in the same basic block are considered + // to have trivial kills. + return I->hasOneUse() && + !(I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt || + I->getOpcode() == Instruction::IntToPtr) && + cast<Instruction>(*I->use_begin())->getParent() == I->getParent(); +} + +unsigned FastISel::getRegForValue(const Value *V) { + EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); + // Don't handle non-simple values in FastISel. + if (!RealVT.isSimple()) + return 0; + + // Ignore illegal types. We must do this before looking up the value + // in ValueMap because Arguments are given virtual registers regardless + // of whether FastISel can handle them. + MVT VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT)) { + // Promote MVT::i1 to a legal type though, because it's common and easy. + if (VT == MVT::i1) + VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); + else + return 0; + } + + // Look up the value to see if we already have a register for it. We + // cache values defined by Instructions across blocks, and other values + // only locally. This is because Instructions already have the SSA + // def-dominates-use requirement enforced. + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) { + unsigned Reg = I->second; + return Reg; + } + unsigned Reg = LocalValueMap[V]; + if (Reg != 0) + return Reg; + + // In bottom-up mode, just create the virtual register which will be used + // to hold the value. It will be materialized later. + if (isa<Instruction>(V) && + (!isa<AllocaInst>(V) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) + return FuncInfo.InitializeRegForValue(V); + + SavePoint SaveInsertPt = enterLocalValueArea(); + + // Materialize the value in a register. Emit any instructions in the + // local value area. + Reg = materializeRegForValue(V, VT); + + leaveLocalValueArea(SaveInsertPt); + + return Reg; +} + +/// materializeRegForValue - Helper for getRegForValue. This function is +/// called when the value isn't already available in a register and must +/// be materialized with new instructions. +unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { + unsigned Reg = 0; + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getValue().getActiveBits() <= 64) + Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + } else if (isa<AllocaInst>(V)) { + Reg = TargetMaterializeAlloca(cast<AllocaInst>(V)); + } else if (isa<ConstantPointerNull>(V)) { + // Translate this as an integer zero so that it can be + // local-CSE'd with actual integer zeros. + Reg = + getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + // Try to emit the constant directly. + Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + + if (!Reg) { + // Try to emit the constant by using an integer constant with a cast. + const APFloat &Flt = CF->getValueAPF(); + EVT IntVT = TLI.getPointerTy(); + + uint64_t x[2]; + uint32_t IntBitWidth = IntVT.getSizeInBits(); + bool isExact; + (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); + if (isExact) { + APInt IntVal(IntBitWidth, 2, x); + + unsigned IntegerReg = + getRegForValue(ConstantInt::get(V->getContext(), IntVal)); + if (IntegerReg != 0) + Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, + IntegerReg, /*Kill=*/false); + } + } + } else if (const Operator *Op = dyn_cast<Operator>(V)) { + if (!SelectOperator(Op, Op->getOpcode())) + if (!isa<Instruction>(Op) || + !TargetSelectInstruction(cast<Instruction>(Op))) + return 0; + Reg = lookUpRegForValue(Op); + } else if (isa<UndefValue>(V)) { + Reg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::IMPLICIT_DEF), Reg); + } + + // If target-independent code couldn't handle the value, give target-specific + // code a try. + if (!Reg && isa<Constant>(V)) + Reg = TargetMaterializeConstant(cast<Constant>(V)); + + // Don't cache constant materializations in the general ValueMap. + // To do so would require tracking what uses they dominate. + if (Reg != 0) { + LocalValueMap[V] = Reg; + LastLocalValue = MRI.getVRegDef(Reg); + } + return Reg; +} + +unsigned FastISel::lookUpRegForValue(const Value *V) { + // Look up the value to see if we already have a register for it. We + // cache values defined by Instructions across blocks, and other values + // only locally. This is because Instructions already have the SSA + // def-dominates-use requirement enforced. + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) + return I->second; + return LocalValueMap[V]; +} + +/// UpdateValueMap - Update the value map to include the new mapping for this +/// instruction, or insert an extra copy to get the result in a previous +/// determined register. +/// NOTE: This is only necessary because we might select a block that uses +/// a value before we select the block that defines the value. It might be +/// possible to fix this by selecting blocks in reverse postorder. +unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { + if (!isa<Instruction>(I)) { + LocalValueMap[I] = Reg; + return Reg; + } + + unsigned &AssignedReg = FuncInfo.ValueMap[I]; + if (AssignedReg == 0) + // Use the new register. + AssignedReg = Reg; + else if (Reg != AssignedReg) { + // Arrange for uses of AssignedReg to be replaced by uses of Reg. + FuncInfo.RegFixups[AssignedReg] = Reg; + + AssignedReg = Reg; + } + + return AssignedReg; +} + +std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { + unsigned IdxN = getRegForValue(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return std::pair<unsigned, bool>(0, false); + + bool IdxNIsKill = hasTrivialKill(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend it. + MVT PtrVT = TLI.getPointerTy(); + EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); + if (IdxVT.bitsLT(PtrVT)) { + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, + IdxN, IdxNIsKill); + IdxNIsKill = true; + } + else if (IdxVT.bitsGT(PtrVT)) { + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, + IdxN, IdxNIsKill); + IdxNIsKill = true; + } + return std::pair<unsigned, bool>(IdxN, IdxNIsKill); +} + +void FastISel::recomputeInsertPt() { + if (getLastLocalValue()) { + FuncInfo.InsertPt = getLastLocalValue(); + FuncInfo.MBB = FuncInfo.InsertPt->getParent(); + ++FuncInfo.InsertPt; + } else + FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); + + // Now skip past any EH_LABELs, which must remain at the beginning. + while (FuncInfo.InsertPt != FuncInfo.MBB->end() && + FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL) + ++FuncInfo.InsertPt; +} + +FastISel::SavePoint FastISel::enterLocalValueArea() { + MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; + DebugLoc OldDL = DL; + recomputeInsertPt(); + DL = DebugLoc(); + SavePoint SP = { OldInsertPt, OldDL }; + return SP; +} + +void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { + if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) + LastLocalValue = llvm::prior(FuncInfo.InsertPt); + + // Restore the previous insert position. + FuncInfo.InsertPt = OldInsertPt.InsertPt; + DL = OldInsertPt.DL; +} + +/// SelectBinaryOp - Select and emit code for a binary operator instruction, +/// which has an opcode which directly corresponds to the given ISD opcode. +/// +bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { + EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); + if (VT == MVT::Other || !VT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // We only handle legal types. For example, on x86-32 the instruction + // selector contains all of the 64-bit instructions from x86-64, + // under the assumption that i64 won't be used if the target doesn't + // support it. + if (!TLI.isTypeLegal(VT)) { + // MVT::i1 is special. Allow AND, OR, or XOR because they + // don't require additional zeroing, which makes them easy. + if (VT == MVT::i1 && + (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || + ISDOpcode == ISD::XOR)) + VT = TLI.getTypeToTransformTo(I->getContext(), VT); + else + return false; + } + + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (Op0 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); + + // Check if the second operand is a constant and handle it appropriately. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op0IsKill, + CI->getZExtValue()); + if (ResultReg != 0) { + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + } + + // Check if the second operand is a constant float. + if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { + unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op0IsKill, CF); + if (ResultReg != 0) { + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + } + + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (Op1 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + // Now we have both operands in registers. Emit the instruction. + unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, + Op0, Op0IsKill, + Op1, Op1IsKill); + if (ResultReg == 0) + // Target-specific code wasn't able to find a machine opcode for + // the given ISD opcode and type. Halt "fast" selection and bail. + return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; +} + +bool FastISel::SelectGetElementPtr(const User *I) { + unsigned N = getRegForValue(I->getOperand(0)); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool NIsKill = hasTrivialKill(I->getOperand(0)); + + const Type *Ty = I->getOperand(0)->getType(); + MVT VT = TLI.getPointerTy(); + for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, + E = I->op_end(); OI != E; ++OI) { + const Value *Idx = *OI; + if (const StructType *StTy = dyn_cast<StructType>(Ty)) { + unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); + // FIXME: This can be optimized by combining the add with a + // subsequent one. + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + } + Ty = StTy->getElementType(Field); + } else { + Ty = cast<SequentialType>(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->isZero()) continue; + uint64_t Offs = + TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + continue; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = TD.getTypeAllocSize(Ty); + std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); + unsigned IdxN = Pair.first; + bool IdxNIsKill = Pair.second; + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + if (ElementSize != 1) { + IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + IdxNIsKill = true; + } + N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + } + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, N); + return true; +} + +bool FastISel::SelectCall(const User *I) { + const Function *F = cast<CallInst>(I)->getCalledFunction(); + if (!F) return false; + + // Handle selected intrinsic function calls. + unsigned IID = F->getIntrinsicID(); + switch (IID) { + default: break; + case Intrinsic::dbg_declare: { + const DbgDeclareInst *DI = cast<DbgDeclareInst>(I); + if (!DIVariable(DI->getVariable()).Verify() || + !FuncInfo.MF->getMMI().hasDebugInfo()) + return true; + + const Value *Address = DI->getAddress(); + if (!Address) + return true; + if (isa<UndefValue>(Address)) + return true; + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); + // Don't handle byval struct arguments or VLAs, for example. + if (!AI) + // Building the map above is target independent. Generating DBG_VALUE + // inline is target dependent; do this now. + (void)TargetSelectInstruction(cast<Instruction>(I)); + return true; + } + case Intrinsic::dbg_value: { + // This form of DBG_VALUE is target-independent. + const DbgValueInst *DI = cast<DbgValueInst>(I); + const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + const Value *V = DI->getValue(); + if (!V) { + // Currently the optimizer can produce this; insert an undef to + // help debugging. Probably the optimizer should not do this. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(0U).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(CI->getZExtValue()).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addFPImm(CF).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + } else if (unsigned Reg = lookUpRegForValue(V)) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Reg, RegState::Debug).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + } else { + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + // Insert an undef so we can see what we dropped. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(0U).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + } + return true; + } + case Intrinsic::eh_exception: { + EVT VT = TLI.getValueType(I->getType()); + switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { + default: break; + case TargetLowering::Expand: { + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); + unsigned Reg = TLI.getExceptionAddressRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + UpdateValueMap(I, ResultReg); + return true; + } + } + break; + } + case Intrinsic::eh_selector: { + EVT VT = TLI.getValueType(I->getType()); + switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { + default: break; + case TargetLowering::Expand: { + if (FuncInfo.MBB->isLandingPad()) + AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); + else { +#ifndef NDEBUG + FuncInfo.CatchInfoLost.insert(cast<CallInst>(I)); +#endif + // FIXME: Mark exception selector register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionSelectorRegister(); + if (Reg) FuncInfo.MBB->addLiveIn(Reg); + } + + unsigned Reg = TLI.getExceptionSelectorRegister(); + EVT SrcVT = TLI.getPointerTy(); + const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + + bool ResultRegIsKill = hasTrivialKill(I); + + // Cast the register to the type of the selector. + if (SrcVT.bitsGT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, + ResultReg, ResultRegIsKill); + else if (SrcVT.bitsLT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, + ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); + if (ResultReg == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + UpdateValueMap(I, ResultReg); + + return true; + } + } + break; + } + } + + // An arbitrary call. Bail. + return false; +} + +bool FastISel::SelectCast(const User *I, unsigned Opcode) { + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); + + if (SrcVT == MVT::Other || !SrcVT.isSimple() || + DstVT == MVT::Other || !DstVT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // Check if the destination type is legal. Or as a special case, + // it may be i1 if we're doing a truncate because that's + // easy and somewhat common. + if (!TLI.isTypeLegal(DstVT)) + if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // Check if the source operand is legal. Or as a special case, + // it may be i1 if we're doing zero-extension because that's + // easy and somewhat common. + if (!TLI.isTypeLegal(SrcVT)) + if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND) + // Unhandled type. Halt "fast" selection and bail. + return false; + + unsigned InputReg = getRegForValue(I->getOperand(0)); + if (!InputReg) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); + + // If the operand is i1, arrange for the high bits in the register to be zero. + if (SrcVT == MVT::i1) { + SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT); + InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill); + if (!InputReg) + return false; + InputRegIsKill = true; + } + // If the result is i1, truncate to the target's type for i1 first. + if (DstVT == MVT::i1) + DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT); + + unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), + DstVT.getSimpleVT(), + Opcode, + InputReg, InputRegIsKill); + if (!ResultReg) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool FastISel::SelectBitCast(const User *I) { + // If the bitcast doesn't change the type, just use the operand value. + if (I->getType() == I->getOperand(0)->getType()) { + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) + return false; + UpdateValueMap(I, Reg); + return true; + } + + // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators. + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); + + if (SrcVT == MVT::Other || !SrcVT.isSimple() || + DstVT == MVT::Other || !DstVT.isSimple() || + !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT)) + // Unhandled type. Halt "fast" selection and bail. + return false; + + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (Op0 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); + + // First, try to perform the bitcast by inserting a reg-reg copy. + unsigned ResultReg = 0; + if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { + TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); + TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + // Don't attempt a cross-class copy. It will likely fail. + if (SrcClass == DstClass) { + ResultReg = createResultReg(DstClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Op0); + } + } + + // If the reg-reg copy failed, select a BIT_CONVERT opcode. + if (!ResultReg) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), + ISD::BIT_CONVERT, Op0, Op0IsKill); + + if (!ResultReg) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool +FastISel::SelectInstruction(const Instruction *I) { + // Just before the terminator instruction, insert instructions to + // feed PHI nodes in successor blocks. + if (isa<TerminatorInst>(I)) + if (!HandlePHINodesInSuccessorBlocks(I->getParent())) + return false; + + DL = I->getDebugLoc(); + + // First, try doing target-independent selection. + if (SelectOperator(I, I->getOpcode())) { + DL = DebugLoc(); + return true; + } + + // Next, try calling the target to attempt to handle the instruction. + if (TargetSelectInstruction(I)) { + DL = DebugLoc(); + return true; + } + + DL = DebugLoc(); + return false; +} + +/// FastEmitBranch - Emit an unconditional branch to the given block, +/// unless it is the immediate (fall-through) successor, and update +/// the CFG. +void +FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { + if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + // The unconditional fall-through case, which needs no instructions. + } else { + // The unconditional branch case. + TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, + SmallVector<MachineOperand, 0>(), DL); + } + FuncInfo.MBB->addSuccessor(MSucc); +} + +/// SelectFNeg - Emit an FNeg operation. +/// +bool +FastISel::SelectFNeg(const User *I) { + unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); + if (OpReg == 0) return false; + + bool OpRegIsKill = hasTrivialKill(I); + + // If the target has ISD::FNEG, use it. + EVT VT = TLI.getValueType(I->getType()); + unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), + ISD::FNEG, OpReg, OpRegIsKill); + if (ResultReg != 0) { + UpdateValueMap(I, ResultReg); + return true; + } + + // Bitcast the value to integer, twiddle the sign bit with xor, + // and then bitcast it back to floating-point. + if (VT.getSizeInBits() > 64) return false; + EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits()); + if (!TLI.isTypeLegal(IntVT)) + return false; + + unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), + ISD::BIT_CONVERT, OpReg, OpRegIsKill); + if (IntReg == 0) + return false; + + unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, + IntReg, /*Kill=*/true, + UINT64_C(1) << (VT.getSizeInBits()-1), + IntVT.getSimpleVT()); + if (IntResultReg == 0) + return false; + + ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), + ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true); + if (ResultReg == 0) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool +FastISel::SelectOperator(const User *I, unsigned Opcode) { + switch (Opcode) { + case Instruction::Add: + return SelectBinaryOp(I, ISD::ADD); + case Instruction::FAdd: + return SelectBinaryOp(I, ISD::FADD); + case Instruction::Sub: + return SelectBinaryOp(I, ISD::SUB); + case Instruction::FSub: + // FNeg is currently represented in LLVM IR as a special case of FSub. + if (BinaryOperator::isFNeg(I)) + return SelectFNeg(I); + return SelectBinaryOp(I, ISD::FSUB); + case Instruction::Mul: + return SelectBinaryOp(I, ISD::MUL); + case Instruction::FMul: + return SelectBinaryOp(I, ISD::FMUL); + case Instruction::SDiv: + return SelectBinaryOp(I, ISD::SDIV); + case Instruction::UDiv: + return SelectBinaryOp(I, ISD::UDIV); + case Instruction::FDiv: + return SelectBinaryOp(I, ISD::FDIV); + case Instruction::SRem: + return SelectBinaryOp(I, ISD::SREM); + case Instruction::URem: + return SelectBinaryOp(I, ISD::UREM); + case Instruction::FRem: + return SelectBinaryOp(I, ISD::FREM); + case Instruction::Shl: + return SelectBinaryOp(I, ISD::SHL); + case Instruction::LShr: + return SelectBinaryOp(I, ISD::SRL); + case Instruction::AShr: + return SelectBinaryOp(I, ISD::SRA); + case Instruction::And: + return SelectBinaryOp(I, ISD::AND); + case Instruction::Or: + return SelectBinaryOp(I, ISD::OR); + case Instruction::Xor: + return SelectBinaryOp(I, ISD::XOR); + + case Instruction::GetElementPtr: + return SelectGetElementPtr(I); + + case Instruction::Br: { + const BranchInst *BI = cast<BranchInst>(I); + + if (BI->isUnconditional()) { + const BasicBlock *LLVMSucc = BI->getSuccessor(0); + MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; + FastEmitBranch(MSucc, BI->getDebugLoc()); + return true; + } + + // Conditional branches are not handed yet. + // Halt "fast" selection and bail. + return false; + } + + case Instruction::Unreachable: + // Nothing to emit. + return true; + + case Instruction::Alloca: + // FunctionLowering has the static-sized case covered. + if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I))) + return true; + + // Dynamic-sized alloca is not handled yet. + return false; + + case Instruction::Call: + return SelectCall(I); + + case Instruction::BitCast: + return SelectBitCast(I); + + case Instruction::FPToSI: + return SelectCast(I, ISD::FP_TO_SINT); + case Instruction::ZExt: + return SelectCast(I, ISD::ZERO_EXTEND); + case Instruction::SExt: + return SelectCast(I, ISD::SIGN_EXTEND); + case Instruction::Trunc: + return SelectCast(I, ISD::TRUNCATE); + case Instruction::SIToFP: + return SelectCast(I, ISD::SINT_TO_FP); + + case Instruction::IntToPtr: // Deliberate fall-through. + case Instruction::PtrToInt: { + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); + if (DstVT.bitsGT(SrcVT)) + return SelectCast(I, ISD::ZERO_EXTEND); + if (DstVT.bitsLT(SrcVT)) + return SelectCast(I, ISD::TRUNCATE); + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) return false; + UpdateValueMap(I, Reg); + return true; + } + + case Instruction::PHI: + llvm_unreachable("FastISel shouldn't visit PHI nodes!"); + + default: + // Unhandled instruction. Halt "fast" selection and bail. + return false; + } +} + +FastISel::FastISel(FunctionLoweringInfo &funcInfo) + : FuncInfo(funcInfo), + MRI(FuncInfo.MF->getRegInfo()), + MFI(*FuncInfo.MF->getFrameInfo()), + MCP(*FuncInfo.MF->getConstantPool()), + TM(FuncInfo.MF->getTarget()), + TD(*TM.getTargetData()), + TII(*TM.getInstrInfo()), + TLI(*TM.getTargetLowering()), + TRI(*TM.getRegisterInfo()) { +} + +FastISel::~FastISel() {} + +unsigned FastISel::FastEmit_(MVT, MVT, + unsigned) { + return 0; +} + +unsigned FastISel::FastEmit_r(MVT, MVT, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/) { + return 0; +} + +unsigned FastISel::FastEmit_rr(MVT, MVT, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, + unsigned /*Op1*/, bool /*Op1IsKill*/) { + return 0; +} + +unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::FastEmit_f(MVT, MVT, + unsigned, const ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::FastEmit_ri(MVT, MVT, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, + uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::FastEmit_rf(MVT, MVT, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, + const ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::FastEmit_rri(MVT, MVT, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, + unsigned /*Op1*/, bool /*Op1IsKill*/, + uint64_t /*Imm*/) { + return 0; +} + +/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries +/// to emit an instruction with an immediate operand using FastEmit_ri. +/// If that fails, it materializes the immediate into a register and try +/// FastEmit_rr instead. +unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, + unsigned Op0, bool Op0IsKill, + uint64_t Imm, MVT ImmType) { + // First check if immediate type is legal. If not, we can't use the ri form. + unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); + if (ResultReg != 0) + return ResultReg; + unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + if (MaterialReg == 0) + return 0; + return FastEmit_rr(VT, VT, Opcode, + Op0, Op0IsKill, + MaterialReg, /*Kill=*/true); +} + +/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries +/// to emit an instruction with a floating-point immediate operand using +/// FastEmit_rf. If that fails, it materializes the immediate into a register +/// and try FastEmit_rr instead. +unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm, MVT ImmType) { + // First check if immediate type is legal. If not, we can't use the rf form. + unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm); + if (ResultReg != 0) + return ResultReg; + + // Materialize the constant in a register. + unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm); + if (MaterialReg == 0) { + // If the target doesn't have a way to directly enter a floating-point + // value into a register, use an alternate approach. + // TODO: The current approach only supports floating-point constants + // that can be constructed by conversion from integer values. This should + // be replaced by code that creates a load from a constant-pool entry, + // which will require some target-specific work. + const APFloat &Flt = FPImm->getValueAPF(); + EVT IntVT = TLI.getPointerTy(); + + uint64_t x[2]; + uint32_t IntBitWidth = IntVT.getSizeInBits(); + bool isExact; + (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); + if (!isExact) + return 0; + APInt IntVal(IntBitWidth, 2, x); + + unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(), + ISD::Constant, IntVal.getZExtValue()); + if (IntegerReg == 0) + return 0; + MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, + ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true); + if (MaterialReg == 0) + return 0; + } + return FastEmit_rr(VT, VT, Opcode, + Op0, Op0IsKill, + MaterialReg, /*Kill=*/true); +} + +unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { + return MRI.createVirtualRegister(RC); +} + +unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass* RC) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); + return ResultReg; +} + +unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, + unsigned Op0, bool Op0IsKill, + uint32_t Idx) { + unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + assert(TargetRegisterInfo::isVirtualRegister(Op0) && + "Cannot yet extract from physregs"); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx); + return ResultReg; +} + +/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op +/// with all but the least significant bit set to zero. +unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { + return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); +} + +/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. +/// Emit code to ensure constants are copied into registers when needed. +/// Remember the virtual registers that need to be added to the Machine PHI +/// nodes as input. We cannot just directly add them, because expansion +/// might result in multiple MBB's for one BB. As such, the start of the +/// BB might correspond to a different MBB than the end. +bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + const BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::const_iterator I = SuccBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + + // Ignore dead phi's. + if (PN->use_empty()) continue; + + // Only handle legal types. Two interesting things to note here. First, + // by bailing out early, we may leave behind some dead instructions, + // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its + // own moves. Second, this check is necessary becuase FastISel doesn't + // use CreateRegs to create registers, so it always creates + // exactly one register for each non-void instruction. + EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); + if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { + // Promote MVT::i1. + if (VT == MVT::i1) + VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); + else { + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + } + + const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + // Set the DebugLoc for the copy. Prefer the location of the operand + // if there is one; use the location of the PHI otherwise. + DL = PN->getDebugLoc(); + if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) + DL = Inst->getDebugLoc(); + + unsigned Reg = getRegForValue(PHIOp); + if (Reg == 0) { + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + DL = DebugLoc(); + } + } + + return true; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp new file mode 100644 index 0000000..5ef6404 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -0,0 +1,346 @@ +//===-- FunctionLoweringInfo.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating functions from LLVM IR into +// Machine IR. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "function-lowering-info" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +using namespace llvm; + +/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by +/// PHI nodes or outside of the basic block that defines it, or used by a +/// switch or atomic instruction, which may expand to multiple basic blocks. +static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { + if (I->use_empty()) return false; + if (isa<PHINode>(I)) return true; + const BasicBlock *BB = I->getParent(); + for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + const User *U = *UI; + if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U)) + return true; + } + return false; +} + +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) { + // With FastISel active, we may be splitting blocks, so force creation + // of virtual registers for all non-dead arguments. + if (EnableFastISel) + return A->use_empty(); + + const BasicBlock *Entry = A->getParent()->begin(); + for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); + UI != E; ++UI) { + const User *U = *UI; + if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) + return false; // Use not in entry block. + } + return true; +} + +FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) + : TLI(tli) { +} + +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { + Fn = &fn; + MF = &mf; + RegInfo = &MF->getRegInfo(); + + // Check whether the function can return without sret-demotion. + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(Fn->getReturnType(), + Fn->getAttributes().getRetAttributes(), Outs, TLI); + CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(), + Outs, Fn->getContext()); + + // Create a vreg for each argument register that is not dead and is used + // outside of the entry block for the function. + for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); + AI != E; ++AI) + if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) + InitializeRegForValue(AI); + + // Initialize the mapping of values to registers. This is only set up for + // instruction values that are used outside of the block that defines + // them. + Function::const_iterator BB = Fn->begin(), EB = Fn->end(); + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { + const Type *Ty = AI->getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + AI->getAlignment()); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + + // The object may need to be placed onto the stack near the stack + // protector if one exists. Determine here if this object is a suitable + // candidate. I.e., it would trigger the creation of a stack protector. + bool MayNeedSP = + (AI->isArrayAllocation() || + (TySize > 8 && isa<ArrayType>(Ty) && + cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); + StaticAllocaMap[AI] = + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP); + } + + for (; BB != EB; ++BB) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Mark values used outside their block as exported, by allocating + // a virtual register for them. + if (isUsedOutsideOfDefiningBlock(I)) + if (!isa<AllocaInst>(I) || + !StaticAllocaMap.count(cast<AllocaInst>(I))) + InitializeRegForValue(I); + + // Collect llvm.dbg.declare information. This is done now instead of + // during the initial isel pass through the IR so that it is done + // in a predictable order. + if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { + MachineModuleInfo &MMI = MF->getMMI(); + if (MMI.hasDebugInfo() && + DIVariable(DI->getVariable()).Verify() && + !DI->getDebugLoc().isUnknown()) { + // Don't handle byval struct arguments or VLAs, for example. + // Non-byval arguments are handled here (they refer to the stack + // temporary alloca at this point). + const Value *Address = DI->getAddress(); + if (Address) { + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { + DenseMap<const AllocaInst *, int>::iterator SI = + StaticAllocaMap.find(AI); + if (SI != StaticAllocaMap.end()) { // Check for VLAs. + int FI = SI->second; + MMI.setVariableDbgInfo(DI->getVariable(), + FI, DI->getDebugLoc()); + } + } + } + } + } + } + + // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This + // also creates the initial PHI MachineInstrs, though none of the input + // operands are populated. + for (BB = Fn->begin(); BB != EB; ++BB) { + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); + MBBMap[BB] = MBB; + MF->push_back(MBB); + + // Transfer the address-taken flag. This is necessary because there could + // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only + // the first one should be marked. + if (BB->hasAddressTaken()) + MBB->setHasAddressTaken(); + + // Create Machine PHI nodes for LLVM PHI nodes, lowering them as + // appropriate. + for (BasicBlock::const_iterator I = BB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + if (PN->use_empty()) continue; + + DebugLoc DL = PN->getDebugLoc(); + unsigned PHIReg = ValueMap[PN]; + assert(PHIReg && "PHI node does not have an assigned virtual register!"); + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + for (unsigned i = 0; i != NumRegisters; ++i) + BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); + PHIReg += NumRegisters; + } + } + } + + // Mark landing pad blocks. + for (BB = Fn->begin(); BB != EB; ++BB) + if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) + MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); +} + +/// clear - Clear out all the function-specific state. This returns this +/// FunctionLoweringInfo to an empty state, ready to be used for a +/// different function. +void FunctionLoweringInfo::clear() { + assert(CatchInfoFound.size() == CatchInfoLost.size() && + "Not all catch info was assigned to a landing pad!"); + + MBBMap.clear(); + ValueMap.clear(); + StaticAllocaMap.clear(); +#ifndef NDEBUG + CatchInfoLost.clear(); + CatchInfoFound.clear(); +#endif + LiveOutRegInfo.clear(); + ArgDbgValues.clear(); + ByValArgFrameIndexMap.clear(); + RegFixups.clear(); +} + +/// CreateReg - Allocate a single virtual register for the given type. +unsigned FunctionLoweringInfo::CreateReg(EVT VT) { + return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); +} + +/// CreateRegs - Allocate the appropriate number of virtual registers of +/// the correctly promoted or expanded types. Assign these registers +/// consecutive vreg numbers and return the first assigned number. +/// +/// In the case that the given value has struct or array type, this function +/// will assign registers for each member or element. +/// +unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, Ty, ValueVTs); + + unsigned FirstReg = 0; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); + + unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) { + unsigned R = CreateReg(RegisterVT); + if (!FirstReg) FirstReg = R; + } + } + return FirstReg; +} + +/// setByValArgumentFrameIndex - Record frame index for the byval +/// argument. This overrides previous frame index entry for this argument, +/// if any. +void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A, + int FI) { + assert (A->hasByValAttr() && "Argument does not have byval attribute!"); + ByValArgFrameIndexMap[A] = FI; +} + +/// getByValArgumentFrameIndex - Get frame index for the byval argument. +/// If the argument does not have any assigned frame index then 0 is +/// returned. +int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) { + assert (A->hasByValAttr() && "Argument does not have byval attribute!"); + DenseMap<const Argument *, int>::iterator I = + ByValArgFrameIndexMap.find(A); + if (I != ByValArgFrameIndexMap.end()) + return I->second; + DEBUG(dbgs() << "Argument does not have assigned frame index!"); + return 0; +} + +/// AddCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, + MachineBasicBlock *MBB) { + // Inform the MachineModuleInfo of the personality for this landing pad. + const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1)); + assert(CE->getOpcode() == Instruction::BitCast && + isa<Function>(CE->getOperand(0)) && + "Personality should be a function"); + MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0))); + + // Gather all the type infos for this landing pad and pass them along to + // MachineModuleInfo. + std::vector<const GlobalVariable *> TyInfo; + unsigned N = I.getNumArgOperands(); + + for (unsigned i = N - 1; i > 1; --i) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) { + unsigned FilterLength = CI->getZExtValue(); + unsigned FirstCatch = i + FilterLength + !FilterLength; + assert(FirstCatch <= N && "Invalid filter length"); + + if (FirstCatch < N) { + TyInfo.reserve(N - FirstCatch); + for (unsigned j = FirstCatch; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + if (!FilterLength) { + // Cleanup. + MMI->addCleanup(MBB); + } else { + // Filter. + TyInfo.reserve(FilterLength - 1); + for (unsigned j = i + 1; j < FirstCatch; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); + MMI->addFilterTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + N = i; + } + } + + if (N > 2) { + TyInfo.reserve(N - 2); + for (unsigned j = 2; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + } +} + +void llvm::CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, + MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { + for (BasicBlock::const_iterator I = SrcBB->begin(), E = --SrcBB->end(); + I != E; ++I) + if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { + // Apply the catch info to DestBB. + AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]); +#ifndef NDEBUG + if (!FLI.MBBMap[SrcBB]->isLandingPad()) + FLI.CatchInfoFound.insert(EHSel); +#endif + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp new file mode 100644 index 0000000..61c2a90 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -0,0 +1,896 @@ +//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instr-emitter" +#include "InstrEmitter.h" +#include "SDNodeDbgValue.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +/// CountResults - The results of target nodes have register or immediate +/// operands first, then an optional chain, and optional flag operands (which do +/// not go into the resulting MachineInstr). +unsigned InstrEmitter::CountResults(SDNode *Node) { + unsigned N = Node->getNumValues(); + while (N && Node->getValueType(N - 1) == MVT::Flag) + --N; + if (N && Node->getValueType(N - 1) == MVT::Other) + --N; // Skip over chain result. + return N; +} + +/// CountOperands - The inputs to target nodes have any actual inputs first, +/// followed by an optional chain operand, then an optional flag operand. +/// Compute the number of actual operands that will go into the resulting +/// MachineInstr. +unsigned InstrEmitter::CountOperands(SDNode *Node) { + unsigned N = Node->getNumOperands(); + while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) + --N; + if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) + --N; // Ignore chain if it exists. + return N; +} + +/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an +/// implicit physical register output. +void InstrEmitter:: +EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, + unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VRBase = 0; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Just use the input register directly! + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + return; + } + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + bool MatchReg = true; + const TargetRegisterClass *UseRC = NULL; + if (!IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + bool Match = true; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op.getNode() != Node || Op.getResNo() != ResNo) + continue; + EVT VT = Node->getValueType(Op.getResNo()); + if (VT == MVT::Other || VT == MVT::Flag) + continue; + Match = false; + if (User->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(User->getMachineOpcode()); + const TargetRegisterClass *RC = 0; + if (i+II.getNumDefs() < II.getNumOperands()) + RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI); + if (!UseRC) + UseRC = RC; + else if (RC) { + const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC); + // If multiple uses expect disjoint register classes, we emit + // copies in AddRegisterOperand. + if (ComRC) + UseRC = ComRC; + } + } + } + } + MatchReg &= Match; + if (VRBase) + break; + } + + EVT VT = Node->getValueType(ResNo); + const TargetRegisterClass *SrcRC = 0, *DstRC = 0; + SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); + + // Figure out the register class to create for the destreg. + if (VRBase) { + DstRC = MRI->getRegClass(VRBase); + } else if (UseRC) { + assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + DstRC = UseRC; + } else { + DstRC = TLI->getRegClassFor(VT); + } + + // If all uses are reading from the src physical register and copying the + // register is either impossible or very expensive, then don't create a copy. + if (MatchReg && SrcRC->getCopyCost() < 0) { + VRBase = SrcReg; + } else { + // Create the reg, emit the copy. + VRBase = MRI->createVirtualRegister(DstRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + VRBase).addReg(SrcReg); + } + + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// getDstOfCopyToRegUse - If the only use of the specified result number of +/// node is a CopyToReg, return its destination register. Return 0 otherwise. +unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const { + if (!Node->hasOneUse()) + return 0; + + SDNode *User = *Node->use_begin(); + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return Reg; + } + return 0; +} + +void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && + "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + + for (unsigned i = 0; i < II.getNumDefs(); ++i) { + // If the specific node value is only used by a CopyToReg and the dest reg + // is a vreg in the same register class, use the CopyToReg'd destination + // register instead of creating a new vreg. + unsigned VRBase = 0; + const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI); + if (II.OpInfo[i].isOptionalDef()) { + // Optional def must be a physical register. + unsigned NumResults = CountResults(Node); + VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + } + + if (!VRBase && !IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == i) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); + if (RegRC == RC) { + VRBase = Reg; + MI->addOperand(MachineOperand::CreateReg(Reg, true)); + break; + } + } + } + } + + // Create the result registers for this node and add the result regs to + // the machine instruction. + if (VRBase == 0) { + assert(RC && "Isn't a register operand!"); + VRBase = MRI->createVirtualRegister(RC); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + } + + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } +} + +/// getVR - Return the virtual register corresponding to the specified result +/// of the specified node. +unsigned InstrEmitter::getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Op.isMachineOpcode() && + Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + // Add an IMPLICIT_DEF instruction before every use. + unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); + // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc + // does not include operand register class info. + if (!VReg) { + const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); + VReg = MRI->createVirtualRegister(RC); + } + BuildMI(*MBB, InsertPos, Op.getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), VReg); + return VReg; + } + + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + assert(I != VRBaseMap.end() && "Node emitted out of order - late"); + return I->second; +} + + +/// AddRegisterOperand - Add the specified register as an operand to the +/// specified machine instr. Insert register copies if the register is +/// not in the required register class. +void +InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned) { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + // Get/emit the operand. + unsigned VReg = getVR(Op, VRBaseMap); + assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); + + const TargetInstrDesc &TID = MI->getDesc(); + bool isOptDef = IIOpNum < TID.getNumOperands() && + TID.OpInfo[IIOpNum].isOptionalDef(); + + // If the instruction requires a register in a different class, create + // a new virtual register and copy the value into it. + if (II) { + const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); + const TargetRegisterClass *DstRC = 0; + if (IIOpNum < II->getNumOperands()) + DstRC = II->OpInfo[IIOpNum].getRegClass(TRI); + assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && + "Don't have operand info for this instruction!"); + if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); + VReg = NewVReg; + } + } + + // If this value has only one use, that use is a kill. This is a + // conservative approximation. InstrEmitter does trivial coalescing + // with CopyFromReg nodes, so don't emit kill flags for them. + // Avoid kill flags on Schedule cloned nodes, since there will be + // multiple uses. + // Tied operands are never killed, so we need to check that. And that + // means we need to determine the index of the operand. + bool isKill = Op.hasOneUse() && + Op.getNode()->getOpcode() != ISD::CopyFromReg && + !IsDebug && + !(IsClone || IsCloned); + if (isKill) { + unsigned Idx = MI->getNumOperands(); + while (Idx > 0 && + MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) + --Idx; + bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; + if (isTied) + isKill = false; + } + + MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef, + false/*isImp*/, isKill, + false/*isDead*/, false/*isUndef*/, + false/*isEarlyClobber*/, + 0/*SubReg*/, IsDebug)); +} + +/// AddOperand - Add the specified operand to the specified machine instr. II +/// specifies the instruction information for the node, and IIOpNum is the +/// operand number (in the II) that we are adding. IIOpNum and II are used for +/// assertions only. +void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned) { + if (Op.isMachineOpcode()) { + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); + } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateImm(C->getSExtValue())); + } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { + const ConstantFP *CFP = F->getConstantFPValue(); + MI->addOperand(MachineOperand::CreateFPImm(CFP)); + } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(), + TGA->getTargetFlags())); + } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock())); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateFI(FI->getIndex())); + } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(), + JT->getTargetFlags())); + } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { + int Offset = CP->getOffset(); + unsigned Align = CP->getAlignment(); + const Type *Type = CP->getType(); + // MachineConstantPool wants an explicit alignment. + if (Align == 0) { + Align = TM->getTargetData()->getPrefTypeAlignment(Type); + if (Align == 0) { + // Alignment of vector types. FIXME! + Align = TM->getTargetData()->getTypeAllocSize(Type); + } + } + + unsigned Idx; + MachineConstantPool *MCP = MF->getConstantPool(); + if (CP->isMachineConstantPoolEntry()) + Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); + else + Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); + MI->addOperand(MachineOperand::CreateCPI(Idx, Offset, + CP->getTargetFlags())); + } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateES(ES->getSymbol(), + ES->getTargetFlags())); + } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), + BA->getTargetFlags())); + } else { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); + } +} + +/// getSuperRegisterRegClass - Returns the register class of a superreg A whose +/// "SubIdx"'th sub-register class is the specified register class and whose +/// type matches the specified type. +static const TargetRegisterClass* +getSuperRegisterRegClass(const TargetRegisterClass *TRC, + unsigned SubIdx, EVT VT) { + // Pick the register class of the superegister for this type + for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(), + E = TRC->superregclasses_end(); I != E; ++I) + if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC) + return *I; + assert(false && "Couldn't find the register class"); + return 0; +} + +/// EmitSubregNode - Generate machine code for subreg nodes. +/// +void InstrEmitter::EmitSubregNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { + unsigned VRBase = 0; + unsigned Opc = Node->getMachineOpcode(); + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + break; + } + } + } + + if (Opc == TargetOpcode::EXTRACT_SUBREG) { + // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub + unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + + // Figure out the register class to create for the destreg. + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(VReg); + const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); + assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI->createVirtualRegister(SRC); + } + + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); + + // Add source, and subreg index + AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) && + "Cannot yet extract from physregs"); + MI->getOperand(1).setSubReg(SubIdx); + MBB->insert(InsertPos, MI); + } else if (Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG) { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + unsigned SubReg = getVR(N1, VRBaseMap); + unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI->createVirtualRegister(SRC); + } + + // Create the insert_subreg or subreg_to_reg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc)); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + + // If creating a subreg_to_reg, then the first input operand + // is an implicit value immediate, otherwise it's a register + if (Opc == TargetOpcode::SUBREG_TO_REG) { + const ConstantSDNode *SD = cast<ConstantSDNode>(N0); + MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); + } else + AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + // Add the subregster being inserted + AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + MI->addOperand(MachineOperand::CreateImm(SubIdx)); + MBB->insert(InsertPos, MI); + } else + llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. +/// COPY_TO_REGCLASS is just a normal copy, except that the destination +/// register is constrained to be in a particular register class. +/// +void +InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + + // Create the new VReg in the destination class and emit a copy. + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + NewVReg).addReg(VReg); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. +/// +void InstrEmitter::EmitRegSequence(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { + const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0)); + unsigned NewVReg = MRI->createVirtualRegister(RC); + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); + unsigned NumOps = Node->getNumOperands(); + assert((NumOps & 1) == 0 && + "REG_SEQUENCE must have an even number of operands!"); + const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + for (unsigned i = 0; i != NumOps; ++i) { + SDValue Op = Node->getOperand(i); + if (i & 1) { + unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); + unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); + if (!SRC) + llvm_unreachable("Invalid subregister index in REG_SEQUENCE"); + if (SRC != RC) { + MRI->setRegClass(NewVReg, SRC); + RC = SRC; + } + } + AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + } + + MBB->insert(InsertPos, MI); + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitDbgValue - Generate machine instruction for a dbg_value node. +/// +MachineInstr * +InstrEmitter::EmitDbgValue(SDDbgValue *SD, + DenseMap<SDValue, unsigned> &VRBaseMap) { + uint64_t Offset = SD->getOffset(); + MDNode* MDPtr = SD->getMDPtr(); + DebugLoc DL = SD->getDebugLoc(); + + if (SD->getKind() == SDDbgValue::FRAMEIX) { + // Stack address; this needs to be lowered in target-dependent fashion. + // EmitTargetCodeForFrameDebugValue is responsible for allocation. + unsigned FrameIx = SD->getFrameIx(); + return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); + } + // Otherwise, we're going to create an instruction here. + const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + MachineInstrBuilder MIB = BuildMI(*MF, DL, II); + if (SD->getKind() == SDDbgValue::SDNODE) { + SDNode *Node = SD->getSDNode(); + SDValue Op = SDValue(Node, SD->getResNo()); + // It's possible we replaced this SDNode with other(s) and therefore + // didn't generate code for it. It's better to catch these cases where + // they happen and transfer the debug info, but trying to guarantee that + // in all cases would be very fragile; this is a safeguard for any + // that were missed. + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + if (I==VRBaseMap.end()) + MIB.addReg(0U); // undef + else + AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, + /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); + } else if (SD->getKind() == SDDbgValue::CONST) { + const Value *V = SD->getConst(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + // FIXME: SDDbgValue constants aren't updated with legalization, so it's + // possible to have i128 constants in them at this point. Dwarf writer + // does not handle i128 constants at the moment so, as a crude workaround, + // just drop the debug info if this happens. + if (!CI->getValue().isSignedIntN(64)) + MIB.addReg(0U); + else + MIB.addImm(CI->getSExtValue()); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + MIB.addFPImm(CF); + } else { + // Could be an Undef. In any case insert an Undef so we can see what we + // dropped. + MIB.addReg(0U); + } + } else { + // Insert an Undef so we can see what we dropped. + MIB.addReg(0U); + } + + MIB.addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + +/// EmitMachineNode - Generate machine code for a target-specific node and +/// needed dependencies. +/// +void InstrEmitter:: +EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned Opc = Node->getMachineOpcode(); + + // Handle subreg insert/extract specially + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG) { + EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); + return; + } + + // Handle COPY_TO_REGCLASS specially. + if (Opc == TargetOpcode::COPY_TO_REGCLASS) { + EmitCopyToRegClassNode(Node, VRBaseMap); + return; + } + + // Handle REG_SEQUENCE specially. + if (Opc == TargetOpcode::REG_SEQUENCE) { + EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); + return; + } + + if (Opc == TargetOpcode::IMPLICIT_DEF) + // We want a unique VR for each IMPLICIT_DEF use. + return; + + const TargetInstrDesc &II = TII->get(Opc); + unsigned NumResults = CountResults(Node); + unsigned NodeOperands = CountOperands(Node); + bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; +#ifndef NDEBUG + unsigned NumMIOperands = NodeOperands + NumResults; + if (II.isVariadic()) + assert(NumMIOperands >= II.getNumOperands() && + "Too few operands for a variadic node!"); + else + assert(NumMIOperands >= II.getNumOperands() && + NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() && + "#operands for dag node doesn't match .td file!"); +#endif + + // Create the new machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); + + // The MachineInstr constructor adds implicit-def operands. Scan through + // these to determine which are dead. + if (MI->getNumOperands() != 0 && + Node->getValueType(Node->getNumValues()-1) == MVT::Flag) { + // First, collect all used registers. + SmallVector<unsigned, 8> UsedRegs; + for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser()) + if (F->getOpcode() == ISD::CopyFromReg) + UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); + else { + // Collect declared implicit uses. + const TargetInstrDesc &TID = TII->get(F->getMachineOpcode()); + UsedRegs.append(TID.getImplicitUses(), + TID.getImplicitUses() + TID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { + unsigned Reg = R->getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) + UsedRegs.push_back(Reg); + } + } + // Then mark unused registers as dead. + MI->setPhysRegsDeadExcept(UsedRegs, *TRI); + } + + // Add result register values for things that are defined by this + // instruction. + if (NumResults) + CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); + + // Emit all of the actual operands of this instruction, adding them to the + // instruction as appropriate. + bool HasOptPRefs = II.getNumDefs() > NumResults; + assert((!HasOptPRefs || !HasPhysRegOuts) && + "Unable to cope with optional defs and phys regs defs!"); + unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + for (unsigned i = NumSkip; i != NodeOperands; ++i) + AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + + // Transfer all of the memory reference descriptions of this instruction. + MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), + cast<MachineSDNode>(Node)->memoperands_end()); + + // Insert the instruction into position in the block. This needs to + // happen before any custom inserter hook is called so that the + // hook knows where in the block to insert the replacement code. + MBB->insert(InsertPos, MI); + + if (II.usesCustomInsertionHook()) { + // Insert this instruction into the basic block using a target + // specific inserter which may returns a new basic block. + bool AtEnd = InsertPos == MBB->end(); + MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); + if (NewMBB != MBB) { + if (AtEnd) + InsertPos = NewMBB->end(); + MBB = NewMBB; + } + return; + } + + // Additional results must be an physical register def. + if (HasPhysRegOuts) { + for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + if (Node->hasAnyUseOfValue(i)) + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + // If there are no uses, mark the register as dead now, so that + // MachineLICM/Sink can see that it's dead. Don't do this if the + // node has a Flag value, for the benefit of targets still using + // Flag for values in physregs. + else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag) + MI->addRegisterDead(Reg, TRI); + } + } + + // If the instruction has implicit defs and the node doesn't, mark the + // implicit def as dead. If the node has any flag outputs, we don't do this + // because we don't know what implicit defs are being used by flagged nodes. + if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag) + if (const unsigned *IDList = II.getImplicitDefs()) { + for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs(); + i != e; ++i) + MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI); + } +} + +/// EmitSpecialNode - Generate machine code for a target-independent node and +/// needed dependencies. +void InstrEmitter:: +EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(); +#endif + llvm_unreachable("This target-independent node should have been selected!"); + break; + case ISD::EntryToken: + llvm_unreachable("EntryToken should have been excluded from the schedule!"); + break; + case ISD::MERGE_VALUES: + case ISD::TokenFactor: // fall thru + break; + case ISD::CopyToReg: { + unsigned SrcReg; + SDValue SrcVal = Node->getOperand(2); + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) + SrcReg = R->getReg(); + else + SrcReg = getVR(SrcVal, VRBaseMap); + + unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + if (SrcReg == DestReg) // Coalesced away the copy? Ignore. + break; + + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + DestReg).addReg(SrcReg); + break; + } + case ISD::CopyFromReg: { + unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); + break; + } + case ISD::EH_LABEL: { + MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel(); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::EH_LABEL)).addSym(S); + break; + } + + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + // Create the inline asm machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::INLINEASM)); + + // Add the asm string as an external symbol operand. + SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); + const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); + MI->addOperand(MachineOperand::CreateES(AsmStr)); + + // Add the isAlignStack bit. + int64_t isAlignStack = + cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))-> + getZExtValue(); + MI->addOperand(MachineOperand::CreateImm(isAlignStack)); + + // Add all of the operand registers to the instruction. + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + MI->addOperand(MachineOperand::CreateImm(Flags)); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegDef: + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + // FIXME: Add dead flags for physical and virtual registers defined. + // For now, mark physical register defs as implicit to help fast + // regalloc. This makes inline asm look a lot like calls. + MI->addOperand(MachineOperand::CreateReg(Reg, true, + /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg))); + } + break; + case InlineAsm::Kind_RegDefEarlyClobber: + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, + /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), + /*isKill=*/ false, + /*isDead=*/ false, + /*isUndef=*/false, + /*isEarlyClobber=*/ true)); + } + break; + case InlineAsm::Kind_RegUse: // Use of register. + case InlineAsm::Kind_Imm: // Immediate. + case InlineAsm::Kind_Mem: // Addressing mode. + // The addressing mode has been selected, just add all of the + // operands to the machine instruction. + for (; NumVals; --NumVals, ++i) + AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); + break; + } + } + + // Get the mdnode from the asm if it exists and add it to the instruction. + SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); + const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); + if (MD) + MI->addOperand(MachineOperand::CreateMetadata(MD)); + + MBB->insert(InsertPos, MI); + break; + } + } +} + +/// InstrEmitter - Construct an InstrEmitter and set it to start inserting +/// at the given position in the given block. +InstrEmitter::InstrEmitter(MachineBasicBlock *mbb, + MachineBasicBlock::iterator insertpos) + : MF(mbb->getParent()), + MRI(&MF->getRegInfo()), + TM(&MF->getTarget()), + TII(TM->getInstrInfo()), + TRI(TM->getRegisterInfo()), + TLI(TM->getTargetLowering()), + MBB(mbb), InsertPos(insertpos) { +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h new file mode 100644 index 0000000..02c044c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -0,0 +1,142 @@ +//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This declares the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTREMITTER_H +#define INSTREMITTER_H + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + +class TargetInstrDesc; +class SDDbgValue; + +class InstrEmitter { + MachineFunction *MF; + MachineRegisterInfo *MRI; + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + + MachineBasicBlock *MBB; + MachineBasicBlock::iterator InsertPos; + + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an + /// implicit physical register output. + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, + bool IsClone, bool IsCloned, + unsigned SrcReg, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getDstOfCopyToRegUse - If the only use of the specified result number of + /// node is a CopyToReg, return its destination register. Return 0 otherwise. + unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const; + + void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getVR - Return the virtual register corresponding to the specified result + /// of the specified node. + unsigned getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// AddRegisterOperand - Add the specified register as an operand to the + /// specified machine instr. Insert register copies if the register is + /// not in the required register class. + void AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned); + + /// AddOperand - Add the specified operand to the specified machine instr. II + /// specifies the instruction information for the node, and IIOpNum is the + /// operand number (in the II) that we are adding. IIOpNum and II are used for + /// assertions only. + void AddOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned); + + /// EmitSubregNode - Generate machine code for subreg nodes. + /// + void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); + + /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. + /// COPY_TO_REGCLASS is just a normal copy, except that the destination + /// register is constrained to be in a particular register class. + /// + void EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. + /// + void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); +public: + /// CountResults - The results of target nodes have register or immediate + /// operands first, then an optional chain, and optional flag operands + /// (which do not go into the machine instrs.) + static unsigned CountResults(SDNode *Node); + + /// CountOperands - The inputs to target nodes have any actual inputs first, + /// followed by an optional chain operand, then flag operands. Compute + /// the number of actual operands that will go into the resulting + /// MachineInstr. + static unsigned CountOperands(SDNode *Node); + + /// EmitDbgValue - Generate machine instruction for a dbg_value node. + /// + MachineInstr *EmitDbgValue(SDDbgValue *SD, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitNode - Generate machine code for a node and needed dependencies. + /// + void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Node->isMachineOpcode()) + EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); + else + EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap); + } + + /// getBlock - Return the current basic block. + MachineBasicBlock *getBlock() { return MBB; } + + /// getInsertPos - Return the current insertion position. + MachineBasicBlock::iterator getInsertPos() { return InsertPos; } + + /// InstrEmitter - Construct an InstrEmitter and set it to start inserting + /// at the given position in the given block. + InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos); + +private: + void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); + void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); +}; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp new file mode 100644 index 0000000..2981cd3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -0,0 +1,3410 @@ +//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::Legalize method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/LLVMContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and +/// hacks on it until the target machine can handle it. This involves +/// eliminating value sizes the machine cannot handle (promoting small sizes to +/// large sizes or splitting up large values into small values) as well as +/// eliminating operations the machine cannot handle. +/// +/// This code also does a small amount of optimization and recognition of idioms +/// as part of its processing. For example, if a target does not support a +/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this +/// will attempt merge setcc and brc instructions into brcc's. +/// +namespace { +class SelectionDAGLegalize { + const TargetMachine &TM; + const TargetLowering &TLI; + SelectionDAG &DAG; + CodeGenOpt::Level OptLevel; + + // Libcall insertion helpers. + + /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// legalized. We use this to ensure that calls are properly serialized + /// against each other, including inserted libcalls. + SDValue LastCALLSEQ_END; + + /// IsLegalizingCall - This member is used *only* for purposes of providing + /// helpful assertions that a libcall isn't created while another call is + /// being legalized (which could lead to non-serialized call sequences). + bool IsLegalizingCall; + + enum LegalizeAction { + Legal, // The target natively supports this operation. + Promote, // This operation should be executed in a larger type. + Expand // Try to expand this to other ops, otherwise use a libcall. + }; + + /// ValueTypeActions - This is a bitvector that contains two bits for each + /// value type, where the two bits correspond to the LegalizeAction enum. + /// This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + DenseMap<SDValue, SDValue> LegalizedNodes; + + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + +public: + SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol); + + /// getTypeAction - Return how we should legalize values of this type, either + /// it is already legal or we need to expand it into multiple registers of + /// smaller integer type, or we need to promote it to a larger type. + LegalizeAction getTypeAction(EVT VT) const { + return (LegalizeAction)ValueTypeActions.getTypeAction(VT); + } + + /// isTypeLegal - Return true if this type is legal on this target. + /// + bool isTypeLegal(EVT VT) const { + return getTypeAction(VT) == Legal; + } + + void LegalizeDAG(); + +private: + /// LegalizeOp - We know that the specified value has a legal type. + /// Recursively ensure that the operands have legal types, then return the + /// result. + SDValue LegalizeOp(SDValue O); + + SDValue OptimizeFloatStore(StoreSDNode *ST); + + /// PerformInsertVectorEltInMemory - Some target cannot handle a variable + /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it + /// is necessary to spill the vector being inserted into to memory, perform + /// the insert there, and then read the result back. + SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, + SDValue Idx, DebugLoc dl); + SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, + SDValue Idx, DebugLoc dl); + + /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which + /// performs the same shuffe in terms of order or result bytes, but on a type + /// whose vector element type is narrower than the original shuffle type. + /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> + SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, + SDValue N1, SDValue N2, + SmallVectorImpl<int> &Mask) const; + + bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet<SDNode*, 32> &NodesLeadingTo); + + void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + DebugLoc dl); + + SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); + SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128); + + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); + SDValue ExpandBUILD_VECTOR(SDNode *Node); + SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); + void ExpandDYNAMIC_STACKALLOC(SDNode *Node, + SmallVectorImpl<SDValue> &Results); + SDValue ExpandFCOPYSIGN(SDNode *Node); + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, + DebugLoc dl); + SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, + DebugLoc dl); + SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, + DebugLoc dl); + + SDValue ExpandBSWAP(SDValue Op, DebugLoc dl); + SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); + + SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + SDValue ExpandVectorBuildThroughStack(SDNode* Node); + + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); + + void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); +}; +} + +/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which +/// performs the same shuffe in terms of order or result bytes, but on a type +/// whose vector element type is narrower than the original shuffle type. +/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> +SDValue +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, + SDValue N1, SDValue N2, + SmallVectorImpl<int> &Mask) const { + unsigned NumMaskElts = VT.getVectorNumElements(); + unsigned NumDestElts = NVT.getVectorNumElements(); + unsigned NumEltsGrowth = NumDestElts / NumMaskElts; + + assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); + + if (NumEltsGrowth == 1) + return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); + + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumMaskElts; ++i) { + int Idx = Mask[i]; + for (unsigned j = 0; j != NumEltsGrowth; ++j) { + if (Idx < 0) + NewMask.push_back(-1); + else + NewMask.push_back(Idx * NumEltsGrowth + j); + } + } + assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); + assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); + return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); +} + +SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, + CodeGenOpt::Level ol) + : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + DAG(dag), OptLevel(ol), + ValueTypeActions(TLI.getValueTypeActions()) { + assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && + "Too many value types for ValueTypeActions to hold!"); +} + +void SelectionDAGLegalize::LegalizeDAG() { + LastCALLSEQ_END = DAG.getEntryNode(); + IsLegalizingCall = false; + + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) + LegalizeOp(SDValue(I, 0)); + + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); +} + + +/// FindCallEndFromCallStart - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_END node that terminates the call sequence. +static SDNode *FindCallEndFromCallStart(SDNode *Node) { + if (Node->getOpcode() == ISD::CALLSEQ_END) + return Node; + if (Node->use_empty()) + return 0; // No CallSeqEnd + + // The chain is usually at the end. + SDValue TheChain(Node, Node->getNumValues()-1); + if (TheChain.getValueType() != MVT::Other) { + // Sometimes it's at the beginning. + TheChain = SDValue(Node, 0); + if (TheChain.getValueType() != MVT::Other) { + // Otherwise, hunt for it. + for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) + if (Node->getValueType(i) == MVT::Other) { + TheChain = SDValue(Node, i); + break; + } + + // Otherwise, we walked into a node without a chain. + if (TheChain.getValueType() != MVT::Other) + return 0; + } + } + + for (SDNode::use_iterator UI = Node->use_begin(), + E = Node->use_end(); UI != E; ++UI) { + + // Make sure to only follow users of our token chain. + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) + if (User->getOperand(i) == TheChain) + if (SDNode *Result = FindCallEndFromCallStart(User)) + return Result; + } + return 0; +} + +/// FindCallStartFromCallEnd - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_START node that initiates the call sequence. +static SDNode *FindCallStartFromCallEnd(SDNode *Node) { + assert(Node && "Didn't find callseq_start for a call??"); + if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; + + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Node doesn't have a token chain argument!"); + return FindCallStartFromCallEnd(Node->getOperand(0).getNode()); +} + +/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to +/// see if any uses can reach Dest. If no dest operands can get to dest, +/// legalize them, legalize ourself, and return false, otherwise, return true. +/// +/// Keep track of the nodes we fine that actually do lead to Dest in +/// NodesLeadingTo. This avoids retraversing them exponential number of times. +/// +bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet<SDNode*, 32> &NodesLeadingTo) { + if (N == Dest) return true; // N certainly leads to Dest :) + + // If we've already processed this node and it does lead to Dest, there is no + // need to reprocess it. + if (NodesLeadingTo.count(N)) return true; + + // If the first result of this node has been already legalized, then it cannot + // reach N. + if (LegalizedNodes.count(SDValue(N, 0))) return false; + + // Okay, this node has not already been legalized. Check and legalize all + // operands. If none lead to Dest, then we can legalize this node. + bool OperandsLeadToDest = false; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + OperandsLeadToDest |= // If an operand leads to Dest, so do we. + LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, + NodesLeadingTo); + + if (OperandsLeadToDest) { + NodesLeadingTo.insert(N); + return true; + } + + // Okay, this node looks safe, legalize it and return false. + LegalizeOp(SDValue(N, 0)); + return false; +} + +/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or +/// a load from the constant pool. +static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, + SelectionDAG &DAG, const TargetLowering &TLI) { + bool Extend = false; + DebugLoc dl = CFP->getDebugLoc(); + + // If a FP immediate is precise when represented as a float and if the + // target can do an extending load from float to double, we put it into + // the constant pool as a float, even if it's is statically typed as a + // double. This shrinks FP constants and canonicalizes them for targets where + // an FP extending load is the same cost as a normal load (such as on the x87 + // fp stack or PPC FP unit). + EVT VT = CFP->getValueType(0); + ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue()); + if (!UseCP) { + assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); + return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), + (VT == MVT::f64) ? MVT::i64 : MVT::i32); + } + + EVT OrigVT = VT; + EVT SVT = VT; + while (SVT != MVT::f32) { + SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); + if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && + // Only do this if the target has a native EXTLOAD instruction from + // smaller type. + TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && + TLI.ShouldShrinkFPConstant(OrigVT)) { + const Type *SType = SVT.getTypeForEVT(*DAG.getContext()); + LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); + VT = SVT; + Extend = true; + } + } + + SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + if (Extend) + return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl, + DAG.getEntryNode(), + CPIdx, PseudoSourceValue::getConstantPool(), + 0, VT, false, false, Alignment); + return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, false, false, + Alignment); +} + +/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. +static +SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI) { + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDValue Val = ST->getValue(); + EVT VT = Val.getValueType(); + int Alignment = ST->getAlignment(); + int SVOffset = ST->getSrcValueOffset(); + DebugLoc dl = ST->getDebugLoc(); + if (ST->getMemoryVT().isFloatingPoint() || + ST->getMemoryVT().isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (TLI.isTypeLegal(intVT)) { + // Expand to a bitconvert of the value to the integer type of the + // same size, then a (misaligned) int store. + // FIXME: Does not handle truncating floating point stores! + SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val); + return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(), + SVOffset, ST->isVolatile(), ST->isNonTemporal(), + Alignment); + } else { + // Do a (aligned) store to a stack slot, then copy from the stack slot + // to the final destination using (unaligned) integer loads and stores. + EVT StoredVT = ST->getMemoryVT(); + EVT RegVT = + TLI.getRegisterType(*DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), + StoredVT.getSizeInBits())); + unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + + // Perform the original store, only redirected to the stack slot. + SDValue Store = DAG.getTruncStore(Chain, dl, + Val, StackPtr, NULL, 0, StoredVT, + false, false, 0); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the stack slot. + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0, + false, false, 0); + // Store it to the final location. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, + ST->getSrcValue(), SVOffset + Offset, + ST->isVolatile(), ST->isNonTemporal(), + MinAlign(ST->getAlignment(), Offset))); + // Increment the pointers. + Offset += RegBytes; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + } + + // The last store may be partial. Do a truncating store. On big-endian + // machines this requires an extending load from the stack slot to ensure + // that the bits are in the right place. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (StoredBytes - Offset)); + + // Load from the stack slot. + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr, + NULL, 0, MemVT, false, false, 0); + + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, + ST->getSrcValue(), SVOffset + Offset, + MemVT, ST->isVolatile(), + ST->isNonTemporal(), + MinAlign(ST->getAlignment(), Offset))); + // The order of the stores doesn't matter - say it with a TokenFactor. + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + } + } + assert(ST->getMemoryVT().isInteger() && + !ST->getMemoryVT().isVector() && + "Unaligned store of unknown type."); + // Get the half-size VT + EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext()); + int NumBits = NewStoredVT.getSizeInBits(); + int IncrementSize = NumBits / 8; + + // Divide the stored value in two parts. + SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue Lo = Val; + SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); + + // Store the two parts + SDValue Store1, Store2; + Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, + ST->getSrcValue(), SVOffset, NewStoredVT, + ST->isVolatile(), ST->isNonTemporal(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Alignment = MinAlign(Alignment, IncrementSize); + Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, + ST->getSrcValue(), SVOffset + IncrementSize, + NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), + Alignment); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); +} + +/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. +static +SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI) { + int SVOffset = LD->getSrcValueOffset(); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + EVT VT = LD->getValueType(0); + EVT LoadedVT = LD->getMemoryVT(); + DebugLoc dl = LD->getDebugLoc(); + if (VT.isFloatingPoint() || VT.isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); + if (TLI.isTypeLegal(intVT)) { + // Expand to a (misaligned) integer load of the same size, + // then bitconvert to floating point or vector. + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad); + if (VT.isFloatingPoint() && LoadedVT != VT) + Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); + + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, dl); + } else { + // Copy the value to a (aligned) stack slot using (unaligned) integer + // loads and stores, then do a (aligned) load from the stack slot. + EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); + unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + SDValue StackPtr = StackBase; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the original location. + SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset + Offset, LD->isVolatile(), + LD->isNonTemporal(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, + NULL, 0, false, false, 0)); + // Increment the pointers. + Offset += RegBytes; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + } + + // The last copy may be partial. Do an extending load. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (LoadedBytes - Offset)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr, + LD->getSrcValue(), SVOffset + Offset, + MemVT, LD->isVolatile(), + LD->isNonTemporal(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + // On big-endian machines this requires a truncating store to ensure + // that the bits end up in the right place. + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, + NULL, 0, MemVT, false, false, 0)); + + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + + // Finally, perform the original load only redirected to the stack slot. + Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase, + NULL, 0, LoadedVT, false, false, 0); + + // Callers expect a MERGE_VALUES node. + SDValue Ops[] = { Load, TF }; + return DAG.getMergeValues(Ops, 2, dl); + } + } + assert(LoadedVT.isInteger() && !LoadedVT.isVector() && + "Unaligned load of unsupported type."); + + // Compute the new VT that is half the size of the old one. This is an + // integer MVT. + unsigned NumBits = LoadedVT.getSizeInBits(); + EVT NewLoadedVT; + NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); + NumBits >>= 1; + + unsigned Alignment = LD->getAlignment(); + unsigned IncrementSize = NumBits / 8; + ISD::LoadExtType HiExtType = LD->getExtensionType(); + + // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. + if (HiExtType == ISD::NON_EXTLOAD) + HiExtType = ISD::ZEXTLOAD; + + // Load the value in two parts + SDValue Lo, Hi; + if (TLI.isLittleEndian()) { + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset, NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + } else { + Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset, NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + } + + // aggregate the two parts + SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); + Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + SDValue Ops[] = { Result, TF }; + return DAG.getMergeValues(Ops, 2, dl); +} + +/// PerformInsertVectorEltInMemory - Some target cannot handle a variable +/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it +/// is necessary to spill the vector being inserted into to memory, perform +/// the insert there, and then read the result back. +SDValue SelectionDAGLegalize:: +PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, + DebugLoc dl) { + SDValue Tmp1 = Vec; + SDValue Tmp2 = Val; + SDValue Tmp3 = Idx; + + // If the target doesn't support this, we have to spill the input vector + // to a temporary stack slot, update the element, then reload it. This is + // badness. We could also load the value into a vector register (either + // with a "move to register" or "extload into register" instruction, then + // permute it into place, if the idx is a constant and if the idx is + // supported by the target. + EVT VT = Tmp1.getValueType(); + EVT EltVT = VT.getVectorElementType(); + EVT IdxVT = Tmp3.getValueType(); + EVT PtrVT = TLI.getPointerTy(); + SDValue StackPtr = DAG.CreateStackTemporary(VT); + + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + + // Store the vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0, + false, false, 0); + + // Truncate or zero extend offset to target pointer type. + unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; + Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3); + // Add the offset to the index. + unsigned EltSize = EltVT.getSizeInBits()/8; + Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT)); + SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); + // Store the scalar value. + Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, + PseudoSourceValue::getFixedStack(SPFI), 0, EltVT, + false, false, 0); + // Load the updated vector. + return DAG.getLoad(VT, dl, Ch, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0, + false, false, 0); +} + + +SDValue SelectionDAGLegalize:: +ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { + if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { + // SCALAR_TO_VECTOR requires that the type of the value being inserted + // match the element type of the vector being created, except for + // integers in which case the inserted value can be over width. + EVT EltVT = Vec.getValueType().getVectorElementType(); + if (Val.getValueType() == EltVT || + (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) { + SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + Vec.getValueType(), Val); + + unsigned NumElts = Vec.getValueType().getVectorNumElements(); + // We generate a shuffle of InVec and ScVec, so the shuffle mask + // should be 0,1,2,3,4,5... with the appropriate element replaced with + // elt 0 of the RHS. + SmallVector<int, 8> ShufOps; + for (unsigned i = 0; i != NumElts; ++i) + ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); + + return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, + &ShufOps[0]); + } + } + return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); +} + +SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // FIXME: We shouldn't do this for TargetConstantFP's. + // FIXME: move this to the DAG Combiner! Note that we can't regress due + // to phase ordering between legalized code and the dag combiner. This + // probably means that we need to integrate dag combiner and legalizer + // together. + // We generally can't do this one for long doubles. + SDValue Tmp1 = ST->getChain(); + SDValue Tmp2 = ST->getBasePtr(); + SDValue Tmp3; + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + DebugLoc dl = ST->getDebugLoc(); + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { + if (CFP->getValueType(0) == MVT::f32 && + getTypeAction(MVT::i32) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF(). + bitcastToAPInt().zextOrTrunc(32), + MVT::i32); + return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, isNonTemporal, Alignment); + } else if (CFP->getValueType(0) == MVT::f64) { + // If this target supports 64-bit registers, do a single 64-bit store. + if (getTypeAction(MVT::i64) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + zextOrTrunc(64), MVT::i64); + return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, isNonTemporal, Alignment); + } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + // Otherwise, if the target supports 32-bit registers, use 2 32-bit + // stores. If the target supports neither 32- nor 64-bits, this + // xform is certainly not worth it. + const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32); + SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, isNonTemporal, Alignment); + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(4)); + Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, + isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } + } + } + return SDValue(); +} + +/// LegalizeOp - We know that the specified value has a legal type, and +/// that its operands are legal. Now ensure that the operation itself +/// is legal, recursively ensuring that the operands' operations remain +/// legal. +SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { + if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return Op; + + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + assert(getTypeAction(Node->getValueType(i)) == Legal && + "Unexpected illegal type!"); + + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + assert((isTypeLegal(Node->getOperand(i).getValueType()) || + Node->getOperand(i).getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + SDValue Result = Op; + bool isCustom = false; + + // Figure out the correct action; the way to query this varies by opcode + TargetLowering::LegalizeAction Action; + bool SimpleFinishLegalizing = true; + switch (Node->getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::VAARG: + case ISD::STACKSAVE: + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::EXTRACT_VECTOR_ELT: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: { + EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Action = TLI.getOperationAction(Node->getOpcode(), InnerType); + break; + } + case ISD::SELECT_CC: + case ISD::SETCC: + case ISD::BR_CC: { + unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : + Node->getOpcode() == ISD::SETCC ? 2 : 1; + unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; + EVT OpVT = Node->getOperand(CompareOperand).getValueType(); + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get(); + Action = TLI.getCondCodeAction(CCCode, OpVT); + if (Action == TargetLowering::Legal) { + if (Node->getOpcode() == ISD::SELECT_CC) + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + else + Action = TLI.getOperationAction(Node->getOpcode(), OpVT); + } + break; + } + case ISD::LOAD: + case ISD::STORE: + // FIXME: Model these properly. LOAD and STORE are complicated, and + // STORE expects the unlegalized operand in some cases. + SimpleFinishLegalizing = false; + break; + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: + // FIXME: This shouldn't be necessary. These nodes have special properties + // dealing with the recursive nature of legalization. Removing this + // special case should be done as part of making LegalizeDAG non-recursive. + SimpleFinishLegalizing = false; + break; + case ISD::EXTRACT_ELEMENT: + case ISD::FLT_ROUNDS_: + case ISD::SADDO: + case ISD::SSUBO: + case ISD::UADDO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + case ISD::FPOWI: + case ISD::MERGE_VALUES: + case ISD::EH_RETURN: + case ISD::FRAME_TO_ARGS_OFFSET: + case ISD::EH_SJLJ_SETJMP: + case ISD::EH_SJLJ_LONGJMP: + // These operations lie about being legal: when they claim to be legal, + // they should actually be expanded. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Expand; + break; + case ISD::TRAMPOLINE: + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + // These operations lie about being legal: when they claim to be legal, + // they should actually be custom-lowered. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Custom; + break; + case ISD::BUILD_VECTOR: + // A weird case: legalization for BUILD_VECTOR never legalizes the + // operands! + // FIXME: This really sucks... changing it isn't semantically incorrect, + // but it massively pessimizes the code for floating-point BUILD_VECTORs + // because ConstantFP operands get legalized into constant pool loads + // before the BUILD_VECTOR code can see them. It doesn't usually bite, + // though, because BUILD_VECTORS usually get lowered into other nodes + // which get legalized properly. + SimpleFinishLegalizing = false; + break; + default: + if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { + Action = TargetLowering::Legal; + } else { + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + } + break; + } + + if (SimpleFinishLegalizing) { + SmallVector<SDValue, 8> Ops, ResultVals; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + switch (Node->getOpcode()) { + default: break; + case ISD::BR: + case ISD::BRIND: + case ISD::BR_JT: + case ISD::BR_CC: + case ISD::BRCOND: + // Branches tweak the chain to include LastCALLSEQ_END + Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], + LastCALLSEQ_END); + Ops[0] = LegalizeOp(Ops[0]); + LastCALLSEQ_END = DAG.getEntryNode(); + break; + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::ROTL: + case ISD::ROTR: + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + if (!Ops[1].getValueType().isVector()) + Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1])); + break; + case ISD::SRL_PARTS: + case ISD::SRA_PARTS: + case ISD::SHL_PARTS: + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + if (!Ops[2].getValueType().isVector()) + Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2])); + break; + } + + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), + Ops.size()), 0); + switch (Action) { + case TargetLowering::Legal: + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + ResultVals.push_back(Result.getValue(i)); + break; + case TargetLowering::Custom: + // FIXME: The handling for custom lowering with multiple results is + // a complete mess. + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.getNode()) { + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { + if (e == 1) + ResultVals.push_back(Tmp1); + else + ResultVals.push_back(Tmp1.getValue(i)); + } + break; + } + + // FALL THROUGH + case TargetLowering::Expand: + ExpandNode(Result.getNode(), ResultVals); + break; + case TargetLowering::Promote: + PromoteNode(Result.getNode(), ResultVals); + break; + } + if (!ResultVals.empty()) { + for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { + if (ResultVals[i] != SDValue(Node, i)) + ResultVals[i] = LegalizeOp(ResultVals[i]); + AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); + } + return ResultVals[Op.getResNo()]; + } + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "NODE: "; + Node->dump( &DAG); + dbgs() << "\n"; +#endif + assert(0 && "Do not know how to legalize this operator!"); + + case ISD::BUILD_VECTOR: + switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Result = Tmp3; + break; + } + // FALLTHROUGH + case TargetLowering::Expand: + Result = ExpandBUILD_VECTOR(Result.getNode()); + break; + } + break; + case ISD::CALLSEQ_START: { + SDNode *CallEnd = FindCallEndFromCallStart(Node); + + // Recursively Legalize all of the inputs of the call end that do not lead + // to this call start. This ensures that any libcalls that need be inserted + // are inserted *before* the CALLSEQ_START. + {SmallPtrSet<SDNode*, 32> NodesLeadingTo; + for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) + LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, + NodesLeadingTo); + } + + // Now that we have legalized all of the inputs (which may have inserted + // libcalls), create the new CALLSEQ_START node. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + + // Merge in the last call to ensure that this call starts after the last + // call ended. + if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { + Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + } + + // Do not try to legalize the target-specific arguments (#1+). + if (Tmp1 != Node->getOperand(0)) { + SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], + Ops.size()), Result.getResNo()); + } + + // Remember that the CALLSEQ_START is legalized. + AddLegalizedOperand(Op.getValue(0), Result); + if (Node->getNumValues() == 2) // If this has a flag result, remember it. + AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); + + // Now that the callseq_start and all of the non-call nodes above this call + // sequence have been legalized, legalize the call itself. During this + // process, no libcalls can/will be inserted, guaranteeing that no calls + // can overlap. + assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); + // Note that we are selecting this call! + LastCALLSEQ_END = SDValue(CallEnd, 0); + IsLegalizingCall = true; + + // Legalize the call, starting from the CALLSEQ_END. + LegalizeOp(LastCALLSEQ_END); + assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); + return Result; + } + case ISD::CALLSEQ_END: + // If the CALLSEQ_START node hasn't been legalized first, legalize it. This + // will cause this node to be legalized as well as handling libcalls right. + if (LastCALLSEQ_END.getNode() != Node) { + LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + // Otherwise, the call start has been legalized and everything is going + // according to plan. Just legalize ourselves normally here. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Do not try to legalize the target-specific arguments (#1+), except for + // an optional flag input. + if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){ + if (Tmp1 != Node->getOperand(0)) { + SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); + } + } else { + Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); + if (Tmp1 != Node->getOperand(0) || + Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { + SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Ops.back() = Tmp2; + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); + } + } + assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); + // This finishes up call legalization. + IsLegalizingCall = false; + + // If the CALLSEQ_END node has a flag, remember that we legalized it. + AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); + if (Node->getNumValues() == 2) + AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); + return Result.getValue(Op.getResNo()); + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Node); + Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) { + EVT VT = Node->getValueType(0); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); + Tmp3 = Result.getValue(0); + Tmp4 = Result.getValue(1); + + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), + DAG, TLI); + Tmp3 = Result.getOperand(0); + Tmp4 = Result.getOperand(1); + Tmp3 = LegalizeOp(Tmp3); + Tmp4 = LegalizeOp(Tmp4); + } + } + break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Tmp3, DAG); + if (Tmp1.getNode()) { + Tmp3 = LegalizeOp(Tmp1); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); + } + break; + case TargetLowering::Promote: { + // Only promote a load of vector type to another. + assert(VT.isVector() && "Cannot promote this load!"); + // Change base type to a different vector type. + EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + + Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1)); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); + break; + } + } + // Since loads produce two values, make sure to remember that we + // legalized both of them. + AddLegalizedOperand(SDValue(Node, 0), Tmp3); + AddLegalizedOperand(SDValue(Node, 1), Tmp4); + return Op.getResNo() ? Tmp4 : Tmp3; + } else { + EVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl, + Tmp1, Tmp2, LD->getSrcValue(), SVOffset, + NVT, isVolatile, isNonTemporal, Alignment); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(!SrcVT.isVector() && "Unsupported extload!"); + unsigned RoundWidth = 1 << Log2_32(SrcWidth); + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl, + Tmp1, Tmp2, + LD->getSrcValue(), SVOffset, RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, + LD->getSrcValue(), SVOffset + IncrementSize, + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, + LD->getSrcValue(), SVOffset, RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + Node->getValueType(0), dl, Tmp1, Tmp2, + LD->getSrcValue(), SVOffset + IncrementSize, + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } + + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else { + switch (TLI.getLoadExtAction(ExtType, SrcVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); + + if (isCustom) { + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Tmp1 = LegalizeOp(Tmp3); + Tmp2 = LegalizeOp(Tmp3.getValue(1)); + } + } else { + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + const Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), + DAG, TLI); + Tmp1 = Result.getOperand(0); + Tmp2 = Result.getOperand(1); + Tmp1 = LegalizeOp(Tmp1); + Tmp2 = LegalizeOp(Tmp2); + } + } + } + break; + case TargetLowering::Expand: + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { + SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); + break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); + } + Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Tmp1 = LegalizeOp(Result); // Relegalize new nodes. + Tmp2 = LegalizeOp(Load.getValue(1)); + break; + } + // FIXME: This does not work for vectors on most targets. Sign- and + // zero-extend operations are currently folded into extending loads, + // whether they are legal or not, and then we end up here without any + // support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an explicit + // zero/sign extend inreg. + Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl, + Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT); + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + break; + } + } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + AddLegalizedOperand(SDValue(Node, 0), Tmp1); + AddLegalizedOperand(SDValue(Node, 1), Tmp2); + return Op.getResNo() ? Tmp2 : Tmp1; + } + } + case ISD::STORE: { + StoreSDNode *ST = cast<StoreSDNode>(Node); + Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + + if (!ST->isTruncatingStore()) { + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + Result = SDValue(OptStore, 0); + break; + } + + { + Tmp3 = LegalizeOp(ST->getValue()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); + + EVT VT = Tmp3.getValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + if (ST->getAlignment() < ABIAlignment) + Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), + DAG, TLI); + } + break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.getNode()) Result = Tmp1; + break; + case TargetLowering::Promote: + assert(VT.isVector() && "Unknown legal promote case!"); + Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl, + TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getSrcValue(), SVOffset, isVolatile, + isNonTemporal, Alignment); + break; + } + break; + } + } else { + Tmp3 = LegalizeOp(ST->getValue()); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); + Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); + Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, NVT, isVolatile, isNonTemporal, + Alignment); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(!StVT.isVector() && "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, RoundVT, + isVolatile, isNonTemporal, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, + DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), + SVOffset + IncrementSize, ExtraVT, isVolatile, + isNonTemporal, + MinAlign(Alignment, IncrementSize)); + } else { + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, + DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), + SVOffset, RoundVT, isVolatile, isNonTemporal, + Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset + IncrementSize, ExtraVT, isVolatile, + isNonTemporal, + MinAlign(Alignment, IncrementSize)); + } + + // The order of the stores doesn't matter. + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } else { + if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || + Tmp2 != ST->getBasePtr()) + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); + + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + if (ST->getAlignment() < ABIAlignment) + Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), + DAG, TLI); + } + break; + case TargetLowering::Custom: + Result = TLI.LowerOperation(Result, DAG); + break; + case Expand: + // TRUNCSTORE:i16 i32 -> STORE i16 + assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); + Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, isNonTemporal, + Alignment); + break; + } + } + } + break; + } + } + assert(Result.getValueType() == Op.getValueType() && + "Bad legalization!"); + + // Make sure that the generated code is itself legal. + if (Result != Op) + Result = LegalizeOp(Result); + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { + SDValue Vec = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + // Store the value to a temporary stack slot, then LOAD the returned part. + SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, + false, false, 0); + + // Add the offset to the index. + unsigned EltSize = + Vec.getValueType().getVectorElementType().getSizeInBits()/8; + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, Idx.getValueType())); + + if (Idx.getValueType().bitsGT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); + else + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + + if (Op.getValueType().isVector()) + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0, + false, false, 0); + else + return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr, + NULL, 0, Vec.getValueType().getVectorElementType(), + false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { + // We can't handle this case efficiently. Allocate a sufficiently + // aligned object on the stack, store each element into it, then load + // the result as a vector. + // Create the stack frame object. + EVT VT = Node->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Node->getDebugLoc(); + SDValue FIPtr = DAG.CreateStackTemporary(VT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store of each element to the stack slot. + SmallVector<SDValue, 8> Stores; + unsigned TypeByteSize = EltVT.getSizeInBits() / 8; + // Store (in the right endianness) the elements to memory. + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = TypeByteSize*i; + + SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); + + // If the destination vector element type is narrower than the source + // element type, only store the bits necessary. + if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { + Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, + Node->getOperand(i), Idx, SV, Offset, + EltVT, false, false, 0)); + } else + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, + Node->getOperand(i), Idx, SV, Offset, + false, false, 0)); + } + + SDValue StoreChain; + if (!Stores.empty()) // Not all undef elements? + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + else + StoreChain = DAG.getEntryNode(); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + + // Get the sign bit of the RHS. First obtain a value that has the same + // sign as the sign bit, i.e. negative if and only if the sign bit is 1. + SDValue SignBit; + EVT FloatVT = Tmp2.getValueType(); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); + if (isTypeLegal(IVT)) { + // Convert to an integer with the same sign bit. + SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2); + } else { + // Store the float to memory, then load the sign part out as an integer. + MVT LoadTy = TLI.getPointerTy(); + // First create a temporary that is aligned for both the load and store. + SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); + // Then store the float to it. + SDValue Ch = + DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0, + false, false, 0); + if (TLI.isBigEndian()) { + assert(FloatVT.isByteSized() && "Unsupported floating point type!"); + // Load out a legal integer with the same sign bit as the float. + SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0); + } else { // Little endian + SDValue LoadPtr = StackPtr; + // The float may be wider than the integer we are going to load. Advance + // the pointer so that the loaded integer will contain the sign bit. + unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); + unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; + LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), + LoadPtr, DAG.getIntPtrConstant(ByteOffset)); + // Load a legal integer containing the sign bit. + SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0); + // Move the sign bit to the top bit of the loaded integer. + unsigned BitShift = LoadTy.getSizeInBits() - + (FloatVT.getSizeInBits() - 8 * ByteOffset); + assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?"); + if (BitShift) + SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit, + DAG.getConstant(BitShift,TLI.getShiftAmountTy())); + } + } + // Now get the sign bit proper, by seeing whether the value is negative. + SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()), + SignBit, DAG.getConstant(0, SignBit.getValueType()), + ISD::SETLT); + // Get the absolute value of the result. + SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); + // Select between the nabs and abs value based on the sign bit of + // the input. + return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit, + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); +} + +void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, + SmallVectorImpl<SDValue> &Results) { + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" + " not tell us which reg is the stack pointer!"); + DebugLoc dl = Node->getDebugLoc(); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = SDValue(Node, 0); + SDValue Tmp2 = SDValue(Node, 1); + SDValue Tmp3 = Node->getOperand(2); + SDValue Chain = Tmp1.getOperand(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + + SDValue Size = Tmp2.getOperand(1); + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); + Chain = SP.getValue(1); + unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); + unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); + if (Align > StackAlign) + SP = DAG.getNode(ISD::AND, dl, VT, SP, + DAG.getConstant(-(uint64_t)Align, VT)); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain + + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), SDValue()); + + Results.push_back(Tmp1); + Results.push_back(Tmp2); +} + +/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and +/// condition code CC on the current target. This routine expands SETCC with +/// illegal condition code into AND / OR of multiple SETCC values. +void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, + SDValue &LHS, SDValue &RHS, + SDValue &CC, + DebugLoc dl) { + EVT OpVT = LHS.getValueType(); + ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + switch (TLI.getCondCodeAction(CCCode, OpVT)) { + default: assert(0 && "Unknown condition code action!"); + case TargetLowering::Legal: + // Nothing to do. + break; + case TargetLowering::Expand: { + ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + unsigned Opc = 0; + switch (CCCode) { + default: assert(0 && "Don't know how to expand this condition!"); + case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + // FIXME: Implement more expansions. + } + + SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); + SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + RHS = SDValue(); + CC = SDValue(); + break; + } + } +} + +/// EmitStackConvert - Emit a store/load combination to the stack. This stores +/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does +/// a load from the stack slot to DestVT, extending it if needed. +/// The resultant code need not be legal. +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, + EVT SlotVT, + EVT DestVT, + DebugLoc dl) { + // Create the stack frame object. + unsigned SrcAlign = + TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType(). + getTypeForEVT(*DAG.getContext())); + SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); + int SPFI = StackPtrFI->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + + unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); + unsigned SlotSize = SlotVT.getSizeInBits(); + unsigned DestSize = DestVT.getSizeInBits(); + const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); + unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType); + + // Emit a store to the stack slot. Use a truncstore if the input value is + // later than DestVT. + SDValue Store; + + if (SrcSize > SlotSize) + Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + SV, 0, SlotVT, false, false, SrcAlign); + else { + assert(SrcSize == SlotSize && "Invalid store"); + Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + SV, 0, false, false, SrcAlign); + } + + // Result is a load from the stack slot. + if (SlotSize == DestSize) + return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false, + DestAlign); + + assert(SlotSize < DestSize && "Unknown extension!"); + return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT, + false, false, DestAlign); +} + +SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { + DebugLoc dl = Node->getDebugLoc(); + // Create a vector sized/aligned stack slot, store the value to element #0, + // then load the whole vector back out. + SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr); + int SPFI = StackPtrFI->getIndex(); + + SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), + StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0, + Node->getValueType(0).getVectorElementType(), + false, false, 0); + return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0, + false, false, 0); +} + + +/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't +/// support the operation, but do support the resultant vector type. +SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { + unsigned NumElems = Node->getNumOperands(); + SDValue Value1, Value2; + DebugLoc dl = Node->getDebugLoc(); + EVT VT = Node->getValueType(0); + EVT OpVT = Node->getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); + + // If the only non-undef value is the low element, turn this into a + // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. + bool isOnlyLowElement = true; + bool MoreThanTwoValues = false; + bool isConstant = true; + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (i > 0) + isOnlyLowElement = false; + if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) + isConstant = false; + + if (!Value1.getNode()) { + Value1 = V; + } else if (!Value2.getNode()) { + if (V != Value1) + Value2 = V; + } else if (V != Value1 && V != Value2) { + MoreThanTwoValues = true; + } + } + + if (!Value1.getNode()) + return DAG.getUNDEF(VT); + + if (isOnlyLowElement) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0)); + + // If all elements are constants, create a load from the constant pool. + if (isConstant) { + std::vector<Constant*> CV; + for (unsigned i = 0, e = NumElems; i != e; ++i) { + if (ConstantFPSDNode *V = + dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) { + CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue())); + } else if (ConstantSDNode *V = + dyn_cast<ConstantSDNode>(Node->getOperand(i))) { + if (OpVT==EltVT) + CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + else { + // If OpVT and EltVT don't match, EltVT is not legal and the + // element values have been promoted/truncated earlier. Undo this; + // we don't want a v16i8 to become a v16i32 for example. + const ConstantInt *CI = V->getConstantIntValue(); + CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()), + CI->getZExtValue())); + } + } else { + assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); + const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); + CV.push_back(UndefValue::get(OpNTy)); + } + } + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, false, Alignment); + } + + if (!MoreThanTwoValues) { + SmallVector<int, 8> ShuffleVec(NumElems, -1); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + ShuffleVec[i] = V == Value1 ? 0 : NumElems; + } + if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { + // Get the splatted value into the low element of a vector register. + SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); + SDValue Vec2; + if (Value2.getNode()) + Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); + else + Vec2 = DAG.getUNDEF(VT); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + } + } + + // Otherwise, we can't handle this case efficiently. + return ExpandVectorBuildThroughStack(Node); +} + +// ExpandLibCall - Expand a node into a call to a libcall. If the result value +// does not fit into a register, return the lo part and set the hi part to the +// by-reg argument. If it does fit into a single register, return the result +// and leave the Hi part unset. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, + bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo.first; +} + +// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair<SDValue, SDValue> +SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo; +} + +SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::f32: LC = Call_F32; break; + case MVT::f64: LC = Call_F64; break; + case MVT::f80: LC = Call_F80; break; + case MVT::ppcf128: LC = Call_PPCF128; break; + } + return ExpandLibCall(LC, Node, false); +} + +SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i8: LC = Call_I8; break; + case MVT::i16: LC = Call_I16; break; + case MVT::i32: LC = Call_I32; break; + case MVT::i64: LC = Call_I64; break; + case MVT::i128: LC = Call_I128; break; + } + return ExpandLibCall(LC, Node, isSigned); +} + +/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a +/// INT_TO_FP operation of the specified operand when the target requests that +/// we expand it. At this point, we know that the result and operand types are +/// legal for the target. +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, + SDValue Op0, + EVT DestVT, + DebugLoc dl) { + if (Op0.getValueType() == MVT::i32) { + // simple 32-bit [signed|unsigned] integer to float/double expansion + + // Get the stack frame index of a 8 byte buffer. + SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); + + // word offset constant for Hi/Lo address computation + SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy()); + // set up Hi and Lo (into buffer) address based on endian + SDValue Hi = StackSlot; + SDValue Lo = DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), StackSlot, WordOff); + if (TLI.isLittleEndian()) + std::swap(Hi, Lo); + + // if signed map to unsigned space + SDValue Op0Mapped; + if (isSigned) { + // constant used to invert sign bit (signed to unsigned mapping) + SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32); + Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); + } else { + Op0Mapped = Op0; + } + // store the lo of the constructed double - based on integer input + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, + Op0Mapped, Lo, NULL, 0, + false, false, 0); + // initial hi portion of constructed double + SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32); + // store the hi of the constructed double - biased exponent + SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0, + false, false, 0); + // load the constructed double + SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0, + false, false, 0); + // FP constant to bias correct the final result + SDValue Bias = DAG.getConstantFP(isSigned ? + BitsToDouble(0x4330000080000000ULL) : + BitsToDouble(0x4330000000000000ULL), + MVT::f64); + // subtract the bias + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + // final result + SDValue Result; + // handle final rounding + if (DestVT == MVT::f64) { + // do nothing + Result = Sub; + } else if (DestVT.bitsLT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, + DAG.getIntPtrConstant(0)); + } else if (DestVT.bitsGT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); + } + return Result; + } + assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + // Code below here assumes !isSigned without checking again. + + // Implementation of unsigned i64 to f64 following the algorithm in + // __floatundidf in compiler_rt. This implementation has the advantage + // of performing rounding correctly, both in the default rounding mode + // and in all alternate rounding modes. + // TODO: Generalize this for use with other types. + if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { + SDValue TwoP52 = + DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64); + SDValue TwoP84PlusTwoP52 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64); + SDValue TwoP84 = + DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64); + + SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32); + SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, + DAG.getConstant(32, MVT::i64)); + SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); + SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); + SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr); + SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, + TwoP84PlusTwoP52); + return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); + } + + // Implementation of unsigned i64 to f32. This implementation has the + // advantage of performing rounding correctly. + // TODO: Generalize this for use with other types. + if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + EVT SHVT = TLI.getShiftAmountTy(); + + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, + DAG.getConstant(UINT64_C(0x800), MVT::i64)); + SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); + SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); + SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); + SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), + ISD::SETUGE); + SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); + + SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, + DAG.getConstant(32, SHVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); + SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc); + SDValue TwoP32 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64); + SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); + SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); + SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); + SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); + return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, + DAG.getIntPtrConstant(0)); + + } + + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + + SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, Op0.getValueType()), + ISD::SETLT); + SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4); + SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), + SignSet, Four, Zero); + + // If the sign bit of the integer is set, the large number will be treated + // as a negative number. To counteract this, the dynamic code adds an + // offset depending on the data type. + uint64_t FF; + switch (Op0.getValueType().getSimpleVT().SimpleTy) { + default: assert(0 && "Unsupported integer type!"); + case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) + case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) + case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) + case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) + } + if (TLI.isLittleEndian()) FF <<= 32; + Constant *FudgeFactor = ConstantInt::get( + Type::getInt64Ty(*DAG.getContext()), FF); + + SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset); + Alignment = std::min(Alignment, 4u); + SDValue FudgeInReg; + if (DestVT == MVT::f32) + FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, false, Alignment); + else { + FudgeInReg = + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, + DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + MVT::f32, false, false, Alignment)); + } + + return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); +} + +/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a +/// *INT_TO_FP operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP +/// operation that takes a larger input. +SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, + EVT DestVT, + bool isSigned, + DebugLoc dl) { + // First step, figure out the appropriate *INT_TO_FP operation to use. + EVT NewInTy = LegalOp.getValueType(); + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1); + assert(NewInTy.isInteger() && "Ran out of possibilities!"); + + // If the target supports SINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { + OpToUse = ISD::SINT_TO_FP; + break; + } + if (isSigned) continue; + + // If the target supports UINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { + OpToUse = ISD::UINT_TO_FP; + break; + } + + // Otherwise, try a larger type. + } + + // Okay, we found the operation and type to use. Zero extend our input to the + // desired type then run the operation on it. + return DAG.getNode(OpToUse, dl, DestVT, + DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, NewInTy, LegalOp)); +} + +/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a +/// FP_TO_*INT operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT +/// operation that returns a larger result. +SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, + EVT DestVT, + bool isSigned, + DebugLoc dl) { + // First step, figure out the appropriate FP_TO*INT operation to use. + EVT NewOutTy = DestVT; + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); + assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { + OpToUse = ISD::FP_TO_SINT; + break; + } + + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { + OpToUse = ISD::FP_TO_UINT; + break; + } + + // Otherwise, try a larger type. + } + + + // Okay, we found the operation and type to use. + SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); + + // Truncate the result of the extended FP_TO_*INT operation to the desired + // size. + return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); +} + +/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. +/// +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(); + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT.getSimpleVT().SimpleTy) { + default: assert(0 && "Unhandled Expand type in BSWAP!"); + case MVT::i16: + Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT)); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT)); + Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT)); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); + Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); + } +} + +/// ExpandBitCount - Expand the specified bitcount instruction into operations. +/// +SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, + DebugLoc dl) { + switch (Opc) { + default: assert(0 && "Cannot expand this yet!"); + case ISD::CTPOP: { + static const uint64_t mask[6] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, + 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, + 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL + }; + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(); + unsigned len = VT.getSizeInBits(); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8]) + unsigned EltSize = VT.isVector() ? + VT.getVectorElementType().getSizeInBits() : len; + SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT); + SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); + Op = DAG.getNode(ISD::ADD, dl, VT, + DAG.getNode(ISD::AND, dl, VT, Op, Tmp2), + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3), + Tmp2)); + } + return Op; + } + case ISD::CTLZ: { + // for now, we do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + // + // but see also: http://www.hackersdelight.org/HDcode/nlz.cc + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(); + unsigned len = VT.getSizeInBits(); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); + Op = DAG.getNode(ISD::OR, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); + } + Op = DAG.getNOT(dl, Op, VT); + return DAG.getNode(ISD::CTPOP, dl, VT, Op); + } + case ISD::CTTZ: { + // for now, we use: { return popcount(~x & (x - 1)); } + // unless the target has ctlz but not ctpop, in which case we use: + // { return 32 - nlz(~x & (x-1)); } + // see also http://www.hackersdelight.org/HDcode/ntz.cc + EVT VT = Op.getValueType(); + SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNOT(dl, Op, VT), + DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getConstant(1, VT))); + // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. + if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && + TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) + return DAG.getNode(ISD::SUB, dl, VT, + DAG.getConstant(VT.getSizeInBits(), VT), + DAG.getNode(ISD::CTLZ, dl, VT, Tmp3)); + return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3); + } + } +} + +std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { + unsigned Opc = Node->getOpcode(); + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC; + + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic intrinsic Expand!"); + break; + case ISD::ATOMIC_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; + case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; + case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; + case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + } + break; + case ISD::ATOMIC_CMP_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; + case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; + case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; + case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + } + break; + case ISD::ATOMIC_LOAD_ADD: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + } + break; + case ISD::ATOMIC_LOAD_SUB: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + } + break; + case ISD::ATOMIC_LOAD_AND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + } + break; + case ISD::ATOMIC_LOAD_OR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + } + break; + case ISD::ATOMIC_LOAD_XOR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + } + break; + case ISD::ATOMIC_LOAD_NAND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + } + break; + } + + return ExpandChainLibCall(LC, Node, false); +} + +void SelectionDAGLegalize::ExpandNode(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + switch (Node->getOpcode()) { + case ISD::CTPOP: + case ISD::CTLZ: + case ISD::CTTZ: + Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); + Results.push_back(Tmp1); + break; + case ISD::BSWAP: + Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); + break; + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::FRAME_TO_ARGS_OFFSET: + Results.push_back(DAG.getConstant(0, Node->getValueType(0))); + break; + case ISD::FLT_ROUNDS_: + Results.push_back(DAG.getConstant(1, Node->getValueType(0))); + break; + case ISD::EH_RETURN: + case ISD::EH_LABEL: + case ISD::PREFETCH: + case ISD::VAEND: + case ISD::EH_SJLJ_LONGJMP: + Results.push_back(Node->getOperand(0)); + break; + case ISD::EH_SJLJ_SETJMP: + Results.push_back(DAG.getConstant(0, MVT::i32)); + Results.push_back(Node->getOperand(0)); + break; + case ISD::MEMBARRIER: { + // If the target didn't lower this, lower it to '__sync_synchronize()' call + TargetLowering::ArgListTy Args; + std::pair<SDValue, SDValue> CallResult = + TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy()), + Args, DAG, dl); + Results.push_back(CallResult.second); + break; + } + // By default, atomic intrinsics are marked Legal and lowered. Targets + // which don't support them directly, however, may want libcalls, in which + // case they mark them Expand, and we get here. + // FIXME: Unimplemented for now. Add libcalls. + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_CMP_SWAP: { + std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } + case ISD::DYNAMIC_STACKALLOC: + ExpandDYNAMIC_STACKALLOC(Node, Results); + break; + case ISD::MERGE_VALUES: + for (unsigned i = 0; i < Node->getNumValues(); i++) + Results.push_back(Node->getOperand(i)); + break; + case ISD::UNDEF: { + EVT VT = Node->getValueType(0); + if (VT.isInteger()) + Results.push_back(DAG.getConstant(0, VT)); + else { + assert(VT.isFloatingPoint() && "Unknown value type!"); + Results.push_back(DAG.getConstantFP(0, VT)); + } + break; + } + case ISD::TRAP: { + // If this operation is not supported, lower it to 'abort()' call + TargetLowering::ArgListTy Args; + std::pair<SDValue, SDValue> CallResult = + TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, + DAG.getExternalSymbol("abort", TLI.getPointerTy()), + Args, DAG, dl); + Results.push_back(CallResult.second); + break; + } + case ISD::FP_ROUND: + case ISD::BIT_CONVERT: + Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_EXTEND: + Tmp1 = EmitStackConvert(Node->getOperand(0), + Node->getOperand(0).getValueType(), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::SIGN_EXTEND_INREG: { + // NOTE: we could fall back on load/store here too for targets without + // SAR. However, it is doubtful that any exist. + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + EVT VT = Node->getValueType(0); + EVT ShiftAmountTy = TLI.getShiftAmountTy(); + if (VT.isVector()) + ShiftAmountTy = VT; + unsigned BitsDiff = VT.getScalarType().getSizeInBits() - + ExtraVT.getScalarType().getSizeInBits(); + SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy); + Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), + Node->getOperand(0), ShiftCst); + Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); + Results.push_back(Tmp1); + break; + } + case ISD::FP_ROUND_INREG: { + // The only way we can lower this is to turn it into a TRUNCSTORE, + // EXTLOAD pair, targetting a temporary location (a stack slot). + + // NOTE: there is a choice here between constantly creating new stack + // slots and always reusing the same one. We currently always create + // new ones, as reuse may inhibit scheduling. + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + } + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, + Node->getOperand(0), Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_TO_UINT: { + SDValue True, False; + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); + const uint64_t zero[] = {0, 0}; + APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); + APInt x = APInt::getSignBit(NVT.getSizeInBits()); + (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); + Tmp1 = DAG.getConstantFP(apf, VT); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + Node->getOperand(0), + Tmp1, ISD::SETLT); + True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); + False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, + DAG.getNode(ISD::FSUB, dl, VT, + Node->getOperand(0), Tmp1)); + False = DAG.getNode(ISD::XOR, dl, NVT, False, + DAG.getConstant(x, NVT)); + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False); + Results.push_back(Tmp1); + break; + } + case ISD::VAARG: { + const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + EVT VT = Node->getValueType(0); + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, + false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(Align - 1, + TLI.getPointerTy())); + + VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(-Align, + TLI.getPointerTy())); + } + + // Increment the pointer, VAList, to the next vaarg + Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(TLI.getTargetData()-> + getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), + TLI.getPointerTy())); + // Store the incremented VAList to the legalized pointer + Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0, + false, false, 0); + // Load the actual argument out of the pointer VAList + Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, + false, false, 0)); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::VACOPY: { + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); + const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); + Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), + Node->getOperand(2), VS, 0, false, false, 0); + Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0, + false, false, 0); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_VECTOR_ELT: + if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) + // This must be an access of the only element. Return it. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), + Node->getOperand(0)); + else + Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); + Results.push_back(Tmp1); + break; + case ISD::EXTRACT_SUBVECTOR: + Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); + break; + case ISD::CONCAT_VECTORS: { + Results.push_back(ExpandVectorBuildThroughStack(Node)); + break; + } + case ISD::SCALAR_TO_VECTOR: + Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); + break; + case ISD::INSERT_VECTOR_ELT: + Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0), + Node->getOperand(1), + Node->getOperand(2), dl)); + break; + case ISD::VECTOR_SHUFFLE: { + SmallVector<int, 8> Mask; + cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + + EVT VT = Node->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + if (getTypeAction(EltVT) == Promote) + EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != NumElems; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + continue; + } + unsigned Idx = Mask[i]; + if (Idx < NumElems) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Node->getOperand(0), + DAG.getIntPtrConstant(Idx))); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Node->getOperand(1), + DAG.getIntPtrConstant(Idx - NumElems))); + } + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_ELEMENT: { + EVT OpTy = Node->getOperand(0).getValueType(); + if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { + // 1 -> Hi + Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), + DAG.getConstant(OpTy.getSizeInBits()/2, + TLI.getShiftAmountTy())); + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); + } else { + // 0 -> Lo + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), + Node->getOperand(0)); + } + Results.push_back(Tmp1); + break; + } + case ISD::STACKSAVE: + // Expand to CopyFromReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + } else { + Results.push_back(DAG.getUNDEF(Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::STACKRESTORE: + // Expand to CopyToReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP, + Node->getOperand(1))); + } else { + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::FCOPYSIGN: + Results.push_back(ExpandFCOPYSIGN(Node)); + break; + case ISD::FNEG: + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0)); + Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, + Node->getOperand(0)); + Results.push_back(Tmp1); + break; + case ISD::FABS: { + // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). + EVT VT = Node->getValueType(0); + Tmp1 = Node->getOperand(0); + Tmp2 = DAG.getConstantFP(0.0, VT); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, ISD::SETUGT); + Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); + Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3); + Results.push_back(Tmp1); + break; + } + case ISD::FSQRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128)); + break; + case ISD::FSIN: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_PPCF128)); + break; + case ISD::FCOS: + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_PPCF128)); + break; + case ISD::FLOG: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_PPCF128)); + break; + case ISD::FLOG2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128)); + break; + case ISD::FLOG10: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128)); + break; + case ISD::FEXP: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_PPCF128)); + break; + case ISD::FEXP2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128)); + break; + case ISD::FTRUNC: + Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128)); + break; + case ISD::FFLOOR: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128)); + break; + case ISD::FCEIL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128)); + break; + case ISD::FRINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_PPCF128)); + break; + case ISD::FNEARBYINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128)); + break; + case ISD::FPOWI: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_PPCF128)); + break; + case ISD::FPOW: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_PPCF128)); + break; + case ISD::FDIV: + Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_PPCF128)); + break; + case ISD::FREM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_PPCF128)); + break; + case ISD::FP16_TO_FP32: + Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + break; + case ISD::FP32_TO_FP16: + Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false)); + break; + case ISD::ConstantFP: { + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); + // Check to see if this FP immediate is already legal. + // If this is a legal constant, turn it into a TargetConstantFP node. + if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + Results.push_back(SDValue(Node, 0)); + else + Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); + break; + } + case ISD::EHSELECTION: { + unsigned Reg = TLI.getExceptionSelectorRegister(); + assert(Reg && "Can't expand to unknown register!"); + Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::EXCEPTIONADDR: { + unsigned Reg = TLI.getExceptionAddressRegister(); + assert(Reg && "Can't expand to unknown register!"); + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::SUB: { + EVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && + TLI.isOperationLegalOrCustom(ISD::XOR, VT) && + "Don't know how to expand this subtraction!"); + Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); + Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT)); + Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); + break; + } + case ISD::UREM: + case ISD::SREM: { + EVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + bool isSigned = Node->getOpcode() == ISD::SREM; + unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + Tmp2 = Node->getOperand(0); + Tmp3 = Node->getOperand(1); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); + } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { + // X % Y -> X-X/Y*Y + Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); + Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); + } else if (isSigned) { + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128); + } else { + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128); + } + Results.push_back(Tmp1); + break; + } + case ISD::UDIV: + case ISD::SDIV: { + bool isSigned = Node->getOpcode() == ISD::SDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + EVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + else if (isSigned) + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128); + else + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128); + Results.push_back(Tmp1); + break; + } + case ISD::MULHU: + case ISD::MULHS: { + unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : + ISD::SMUL_LOHI; + EVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) && + "If this wasn't legal, it shouldn't have been created!"); + Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + Results.push_back(Tmp1.getValue(1)); + break; + } + case ISD::MUL: { + EVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + // See if multiply or divide can be lowered using two-result operations. + // We just need the low half of the multiply; try both the signed + // and unsigned forms. If the target supports both SMUL_LOHI and + // UMUL_LOHI, form a preference by checking which forms of plain + // MULH it supports. + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT); + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT); + unsigned OpToUse = 0; + if (HasSMUL_LOHI && !HasMULHS) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI && !HasMULHU) { + OpToUse = ISD::UMUL_LOHI; + } else if (HasSMUL_LOHI) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI) { + OpToUse = ISD::UMUL_LOHI; + } + if (OpToUse) { + Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0), + Node->getOperand(1))); + break; + } + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128); + Results.push_back(Tmp1); + break; + } + case ISD::SADDO: + case ISD::SSUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + EVT OType = Node->getValueType(1); + + SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + Results.push_back(Cmp); + break; + } + case ISD::UADDO: + case ISD::USUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS, + Node->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT)); + break; + } + case ISD::UMULO: + case ISD::SMULO: { + EVT VT = Node->getValueType(0); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue BottomHalf; + SDValue TopHalf; + static const unsigned Ops[2][3] = + { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND }, + { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }}; + bool isSigned = Node->getOpcode() == ISD::SMULO; + if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) { + BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS); + } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) { + BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, + RHS); + TopHalf = BottomHalf.getValue(1); + } else { + // FIXME: We should be able to fall back to a libcall with an illegal + // type in some cases. + // Also, we can fall back to a division in some cases, but that's a big + // performance hit in the general case. + assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() * 2)) && + "Don't know how to expand this operation yet!"); + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); + LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); + RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); + Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, + DAG.getIntPtrConstant(0)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, + DAG.getIntPtrConstant(1)); + } + if (isSigned) { + Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy()); + Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); + TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1, + ISD::SETNE); + } else { + TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, + DAG.getConstant(0, VT), ISD::SETNE); + } + Results.push_back(BottomHalf); + Results.push_back(TopHalf); + break; + } + case ISD::BUILD_PAIR: { + EVT PairTy = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); + Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, + DAG.getConstant(PairTy.getSizeInBits()/2, + TLI.getShiftAmountTy())); + Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); + break; + } + case ISD::SELECT: + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + if (Tmp1.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1), + Tmp2, Tmp3, + cast<CondCodeSDNode>(Tmp1.getOperand(2))->get()); + } else { + Tmp1 = DAG.getSelectCC(dl, Tmp1, + DAG.getConstant(0, Tmp1.getValueType()), + Tmp2, Tmp3, ISD::SETNE); + } + Results.push_back(Tmp1); + break; + case ISD::BR_JT: { + SDValue Chain = Node->getOperand(0); + SDValue Table = Node->getOperand(1); + SDValue Index = Node->getOperand(2); + + EVT PTy = TLI.getPointerTy(); + + const TargetData &TD = *TLI.getTargetData(); + unsigned EntrySize = + DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); + + Index = DAG.getNode(ISD::MUL, dl, PTy, + Index, DAG.getConstant(EntrySize, PTy)); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); + SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr, + PseudoSourceValue::getJumpTable(), 0, MemVT, + false, false, 0); + Addr = LD; + if (TM.getRelocationModel() == Reloc::PIC_) { + // For PIC, the sequence is: + // BRIND(load(Jumptable + index) + RelocBase) + // RelocBase can be JumpTable, GOT or some sort of global base. + Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, + TLI.getPICJumpTableRelocBase(Table, DAG)); + } + Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr); + Results.push_back(Tmp1); + break; + } + case ISD::BRCOND: + // Expand brcond's setcc into its constituent parts and create a BR_CC + // Node. + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + if (Tmp2.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, + Tmp1, Tmp2.getOperand(2), + Tmp2.getOperand(0), Tmp2.getOperand(1), + Node->getOperand(2)); + } else { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, + DAG.getCondCode(ISD::SETNE), Tmp2, + DAG.getConstant(0, Tmp2.getValueType()), + Node->getOperand(2)); + } + Results.push_back(Tmp1); + break; + case ISD::SETCC: { + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl); + + // If we expanded the SETCC into an AND/OR, return the new node + if (Tmp2.getNode() == 0) { + Results.push_back(Tmp1); + break; + } + + // Otherwise, SETCC for the given comparison type must be completely + // illegal; expand it into a SELECT_CC. + EVT VT = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, + DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3); + Results.push_back(Tmp1); + break; + } + case ISD::SELECT_CC: { + Tmp1 = Node->getOperand(0); // LHS + Tmp2 = Node->getOperand(1); // RHS + Tmp3 = Node->getOperand(2); // True + Tmp4 = Node->getOperand(3); // False + SDValue CC = Node->getOperand(4); + + LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, CC, dl); + + assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!"); + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, + Tmp3, Tmp4, CC); + Results.push_back(Tmp1); + break; + } + case ISD::BR_CC: { + Tmp1 = Node->getOperand(0); // Chain + Tmp2 = Node->getOperand(2); // LHS + Tmp3 = Node->getOperand(3); // RHS + Tmp4 = Node->getOperand(1); // CC + + LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), + Tmp2, Tmp3, Tmp4, dl); + LastCALLSEQ_END = DAG.getEntryNode(); + + assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); + Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, + Tmp3, Node->getOperand(4)); + Results.push_back(Tmp1); + break; + } + case ISD::GLOBAL_OFFSET_TABLE: + case ISD::GlobalAddress: + case ISD::GlobalTLSAddress: + case ISD::ExternalSymbol: + case ISD::ConstantPool: + case ISD::JumpTable: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + // FIXME: Custom lowering for these operations shouldn't return null! + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + Results.push_back(SDValue(Node, i)); + break; + } +} +void SelectionDAGLegalize::PromoteNode(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + EVT OVT = Node->getValueType(0); + if (Node->getOpcode() == ISD::UINT_TO_FP || + Node->getOpcode() == ISD::SINT_TO_FP || + Node->getOpcode() == ISD::SETCC) { + OVT = Node->getOperand(0).getValueType(); + } + EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1, Tmp2, Tmp3; + switch (Node->getOpcode()) { + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + // Zero extend the argument. + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + // Perform the larger operation. + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + if (Node->getOpcode() == ISD::CTTZ) { + //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), + Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), + ISD::SETEQ); + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, + DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); + } else if (Node->getOpcode() == ISD::CTLZ) { + // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) + Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), NVT)); + } + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); + break; + case ISD::BSWAP: { + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); + Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, + DAG.getConstant(DiffBits, TLI.getShiftAmountTy())); + Results.push_back(Tmp1); + break; + } + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::FP_TO_SINT, dl); + Results.push_back(Tmp1); + break; + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::SINT_TO_FP, dl); + Results.push_back(Tmp1); + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + unsigned ExtOp, TruncOp; + if (OVT.isVector()) { + ExtOp = ISD::BIT_CONVERT; + TruncOp = ISD::BIT_CONVERT; + } else { + assert(OVT.isInteger() && "Cannot promote logic operation"); + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } + // Promote each of the values to the new type. + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + // Perform the larger operation, then convert back + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1)); + break; + } + case ISD::SELECT: { + unsigned ExtOp, TruncOp; + if (Node->getValueType(0).isVector()) { + ExtOp = ISD::BIT_CONVERT; + TruncOp = ISD::BIT_CONVERT; + } else if (Node->getValueType(0).isInteger()) { + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } else { + ExtOp = ISD::FP_EXTEND; + TruncOp = ISD::FP_ROUND; + } + Tmp1 = Node->getOperand(0); + // Promote each of the values to the new type. + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + // Perform the larger operation, then round down. + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3); + if (TruncOp != ISD::FP_ROUND) + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); + else + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, + DAG.getIntPtrConstant(0)); + Results.push_back(Tmp1); + break; + } + case ISD::VECTOR_SHUFFLE: { + SmallVector<int, 8> Mask; + cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + + // Cast the two input vectors. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); + + // Convert the shuffle mask to the right # elements. + Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1); + Results.push_back(Tmp1); + break; + } + case ISD::SETCC: { + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(2))->get(); + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + } + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Node->getOperand(2))); + break; + } + } +} + +// SelectionDAG::Legalize - This is the entry point for the file. +// +void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) { + /// run - This is the main entry point to this class. + /// + SelectionDAGLegalize(*this, OptLevel).LegalizeDAG(); +} + diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp new file mode 100644 index 0000000..650ee5a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -0,0 +1,1427 @@ +//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements float type expansion and softening for LegalizeTypes. +// Softening is the act of turning a computation in an illegal floating point +// type into a computation in an integer type of the same size; also known as +// "soft float". For example, turning f32 arithmetic into operations using i32. +// The resulting integer value is the same as what you would get by performing +// the floating point operation and bitcasting the result to the integer type. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. For example, +// implementing ppcf128 arithmetic in two f64 registers. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// GetFPLibCall - Return the right libcall for the given floating point type. +static RTLIB::Libcall GetFPLibCall(EVT VT, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128) { + return + VT == MVT::f32 ? Call_F32 : + VT == MVT::f64 ? Call_F64 : + VT == MVT::f80 ? Call_F80 : + VT == MVT::ppcf128 ? Call_PPCF128 : + RTLIB::UNKNOWN_LIBCALL; +} + +//===----------------------------------------------------------------------===// +// Result Float to Integer Conversion. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SoftenFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to soften the result of this operator!"); + + case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break; + case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; + case ISD::ConstantFP: + R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N)); + break; + case ISD::EXTRACT_VECTOR_ELT: + R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; + case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; + case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; + case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; + case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; + case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; + case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break; + case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; + case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; + case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; + case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetSoftenedFloat(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) { + return BitConvertToInteger(N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { + // Convert the inputs to integers, and build a new pair out of them. + return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), + BitConvertToInteger(N->getOperand(0)), + BitConvertToInteger(N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { + return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + NewOp.getValueType().getVectorElementType(), + NewOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned Size = NVT.getSizeInBits(); + + // Mask = ~(1 << (Size-1)) + SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1), + NVT); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(0)); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + + EVT LVT = LHS.getValueType(); + EVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // First get the sign bit of second operand. + SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT), + DAG.getConstant(RSize - 1, + TLI.getShiftAmountTy())); + SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); + if (SizeDiff > 0) { + SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit, + DAG.getConstant(SizeDiff, TLI.getShiftAmountTy())); + SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); + } else if (SizeDiff < 0) { + SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); + SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit, + DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy())); + } + + // Clear the sign bit of the first operand. + SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT), + DAG.getConstant(LSize - 1, + TLI.getShiftAmountTy())); + Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT)); + LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); + + // Or the value with the sign bit. + return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), + GetSoftenedFloat(N->getOperand(0)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); +} + +// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special +// nodes? +SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = N->getOperand(0); + return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, + N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); + return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::i32 && + "Unsupported power type!"); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { + LoadSDNode *L = cast<LoadSDNode>(N); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + DebugLoc dl = N->getDebugLoc(); + + SDValue NewL; + if (L->getExtensionType() == ISD::NON_EXTLOAD) { + NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), + NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), + L->getSrcValue(), L->getSrcValueOffset(), NVT, + L->isVolatile(), L->isNonTemporal(), L->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return NewL; + } + + // Do a non-extending load followed by FP_EXTEND. + NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, + L->getMemoryVT(), dl, L->getChain(), + L->getBasePtr(), L->getOffset(), + L->getSrcValue(), L->getSrcValueOffset(), + L->getMemoryVT(), L->isVolatile(), + L->isNonTemporal(), L->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(1)); + SDValue RHS = GetSoftenedFloat(N->getOperand(2)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0),LHS,RHS); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(2)); + SDValue RHS = GetSoftenedFloat(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + DebugLoc dl = N->getDebugLoc(); + + SDValue NewVAARG; + NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + return NewVAARG; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { + bool Signed = N->getOpcode() == ISD::SINT_TO_FP; + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + EVT NVT = EVT(); + DebugLoc dl = N->getDebugLoc(); + + // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to + // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly + // match. Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE; + t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) { + NVT = (MVT::SimpleValueType)t; + // The source needs to big enough to hold the operand. + if (NVT.bitsGE(SVT)) + LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + // Sign/zero extend the argument if the libcall takes a larger type. + SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + NVT, N->getOperand(0)); + return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), + &Op, 1, false, dl); +} + + +//===----------------------------------------------------------------------===// +// Operand Float to Integer Conversion.. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to soften this operator's operand!"); + + case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; + case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break; + case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; + case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// SoftenSetCCOperands - Soften the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl) { + SDValue LHSInt = GetSoftenedFloat(NewLHS); + SDValue RHSInt = GetSoftenedFloat(NewRHS); + EVT VT = NewLHS.getValueType(); + + assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!"); + + // Expand into one or more soft-fp libcall(s). + RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + switch (CCCode) { + case ISD::SETEQ: + case ISD::SETOEQ: + LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; + break; + case ISD::SETNE: + case ISD::SETUNE: + LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64; + break; + case ISD::SETGE: + case ISD::SETOGE: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; + break; + case ISD::SETLT: + case ISD::SETOLT: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + break; + case ISD::SETLE: + case ISD::SETOLE: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; + break; + case ISD::SETGT: + case ISD::SETOGT: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; + break; + case ISD::SETUO: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; + break; + case ISD::SETO: + LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64; + break; + default: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; + switch (CCCode) { + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + // Fallthrough + case ISD::SETUGT: + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; + break; + case ISD::SETUGE: + LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; + break; + case ISD::SETULT: + LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + break; + case ISD::SETULE: + LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; + break; + case ISD::SETUEQ: + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; + break; + default: assert(false && "Do not know how to soften this setcc!"); + } + } + + // Use the target specific return value for comparions lib calls. + EVT RetVT = TLI.getCmpLibcallReturnType(); + SDValue Ops[2] = { LHSInt, RHSInt }; + NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewRHS = DAG.getConstant(0, RetVT); + CCCode = TLI.getCmpLibcallCC(LC1); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { + SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), + NewLHS, NewRHS, DAG.getCondCode(CCCode)); + NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS, + NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2))); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + NewRHS = SDValue(); + } +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) { + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), + GetSoftenedFloat(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { + EVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { + EVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { + EVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only soften the stored value!"); + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + DebugLoc dl = N->getDebugLoc(); + + if (ST->isTruncatingStore()) + // Do an FP_ROUND followed by a non-truncating store. + Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), + Val, DAG.getIntPtrConstant(0))); + else + Val = GetSoftenedFloat(Val); + + return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); +} + + +//===----------------------------------------------------------------------===// +// Float Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand the result of this operator!"); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; + case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; + case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; + case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; + case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; + case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; + case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; + case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; + case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; + case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; + case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; + case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break; + case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; + case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; + case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; + case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedFloat(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + assert(NVT.getSizeInBits() == integerPartWidth && + "Do not know how to expand this float constant!"); + APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt(); + Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1, + &C.getRawData()[1])), NVT); + Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1, + &C.getRawData()[0])), NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + DebugLoc dl = N->getDebugLoc(); + SDValue Tmp; + GetExpandedFloat(N->getOperand(0), Lo, Tmp); + Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp); + // Lo = Hi==fabs(Hi) ? Lo : -Lo; + Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo, + DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo), + DAG.getCondCode(ISD::SETEQ)); +} + +void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COPYSIGN_F32, + RTLIB::COPYSIGN_F64, + RTLIB::COPYSIGN_F80, + RTLIB::COPYSIGN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32,RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32,RTLIB::LOG10_F64, + RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedFloat(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + DebugLoc dl = N->getDebugLoc(); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + + Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + + // Remember the chain. + Chain = Hi.getValue(1); + + // The low part is zero. + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + + // Modified the chain - switch anything that used the old chain to use the + // new one. + ReplaceValueWith(SDValue(LD, 1), Chain); +} + +void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!"); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; + DebugLoc dl = N->getDebugLoc(); + + // First do an SINT_TO_FP, whether the original was signed or unsigned. + // When promoting partial word types to i32 we must honor the signedness, + // though. + if (SrcVT.bitsLE(MVT::i32)) { + // The integer can be represented exactly in an f64. + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i32, Src); + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); + } else { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (SrcVT.bitsLE(MVT::i64)) { + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i64, Src); + LC = RTLIB::SINTTOFP_I64_PPCF128; + } else if (SrcVT.bitsLE(MVT::i128)) { + Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src); + LC = RTLIB::SINTTOFP_I128_PPCF128; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + Hi = MakeLibCall(LC, VT, &Src, 1, true, dl); + GetPairElements(Hi, Lo, Hi); + } + + if (isSigned) + return; + + // Unsigned - fix up the SINT_TO_FP value just calculated. + Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi); + SrcVT = Src.getValueType(); + + // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128. + static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 }; + static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 }; + static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 }; + const uint64_t *Parts = 0; + + switch (SrcVT.getSimpleVT().SimpleTy) { + default: + assert(false && "Unsupported UINT_TO_FP!"); + case MVT::i32: + Parts = TwoE32; + break; + case MVT::i64: + Parts = TwoE64; + break; + case MVT::i128: + Parts = TwoE128; + break; + } + + Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, + DAG.getConstantFP(APFloat(APInt(128, 2, Parts)), + MVT::ppcf128)); + Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), + Lo, Hi, DAG.getCondCode(ISD::SETLT)); + GetPairElements(Lo, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Float Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatOperand - This method is called when the specified operand of the +/// specified node is found to need expansion. At this point, all of the result +/// types of the node are known to be legal, but other operands of the node may +/// need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) + == TargetLowering::Custom) + Res = TLI.LowerOperation(SDValue(N, 0), DAG); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + llvm_unreachable("Do not know how to expand this operator's operand!"); + + case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + + case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; + case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedFloat(NewLHS, LHSLo, LHSHi); + GetExpandedFloat(NewRHS, RHSLo, RHSHi); + + EVT VT = NewLHS.getValueType(); + assert(VT == MVT::ppcf128 && "Unsupported setcc type!"); + + // FIXME: This generated code sucks. We want to generate + // FCMPU crN, hi1, hi2 + // BNE crN, L: + // FCMPU crN, lo1, lo2 + // The following can be improved, but not that much. + SDValue Tmp1, Tmp2, Tmp3; + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETOEQ); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, CCCode); + Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETUNE); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode); + Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); + NewRHS = SDValue(); // LHS is the result, not a compare. +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(0), Lo, Hi); + // Round it the rest of the way (e.g. to f32) if needed. + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + N->getValueType(0), Hi, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { + EVT RVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128, + N->getOperand(0), DAG.getValueType(MVT::f64)); + Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res, + DAG.getIntPtrConstant(1)); + return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + } + + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { + EVT RVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; + APFloat APF = APFloat(APInt(128, 2, TwoE31)); + SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); + // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X + // FIXME: generated code sucks. + return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp, + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, + DAG.getNode(ISD::FSUB, dl, + MVT::ppcf128, + N->getOperand(0), + Tmp)), + DAG.getConstant(0x80000000, MVT::i32)), + DAG.getNode(ISD::FP_TO_SINT, dl, + MVT::i32, N->getOperand(0)), + DAG.getCondCode(ISD::SETGE)); + } + + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + StoreSDNode *ST = cast<StoreSDNode>(N); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), + ST->getValue().getValueType()); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + + SDValue Lo, Hi; + GetExpandedOp(ST->getValue(), Lo, Hi); + + return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getMemoryVT(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp new file mode 100644 index 0000000..f8c5890 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -0,0 +1,2474 @@ +//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer type expansion and promotion for LegalizeTypes. +// Promotion is the act of changing a computation in an illegal type into a +// computation in a larger type. For example, implementing i8 arithmetic in an +// i32 register (often needed on powerpc). +// Expansion is the act of changing a computation in an illegal type into a +// computation in two identical registers of a smaller type. For example, +// implementing i64 arithmetic in two i32 registers (often needed on 32-bit +// targets). +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Integer Result Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerResult - This method is called when a result of a node is +/// found to be in need of promotion to a larger type. At this point, the node +/// may also have invalid operands or may have other results that need +/// expansion, we just know that (at least) one result needs promotion. +void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "PromoteIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to promote this operator!"); + case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; + case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; + case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break; + case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; + case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; + case ISD::Constant: Res = PromoteIntRes_Constant(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntRes_CONVERT_RNDSAT(N); break; + case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; + case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; + case ISD::EXTRACT_VECTOR_ELT: + Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; + case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; + case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; + case ISD::SIGN_EXTEND_INREG: + Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; + case ISD::SRA: Res = PromoteIntRes_SRA(N); break; + case ISD::SRL: Res = PromoteIntRes_SRL(N); break; + case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; + case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; + case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; + + case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break; + + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + + case ISD::SDIV: + case ISD::SREM: Res = PromoteIntRes_SDIV(N); break; + + case ISD::UDIV: + case ISD::UREM: Res = PromoteIntRes_UDIV(N); break; + + case ISD::SADDO: + case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; + case ISD::UADDO: + case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break; + case ISD::SMULO: + case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: + Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break; + + case ISD::ATOMIC_CMP_SWAP: + Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break; + } + + // If the result is null then the sub-method took care of registering it. + if (Res.getNode()) + SetPromotedInteger(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { + // Sign-extend the new bits, and continue the assertion. + SDValue Op = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertSext, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) { + // Zero the new bits, and continue the assertion. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertZext, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), + N->getChain(), N->getBasePtr(), + Op2, N->getSrcValue(), N->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Op3 = GetPromotedInteger(N->getOperand(3)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), N->getChain(), N->getBasePtr(), + Op2, Op3, N->getSrcValue(), N->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + DebugLoc dl = N->getDebugLoc(); + + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + break; + case Legal: + break; + case PromoteInteger: + if (NOutVT.bitsEq(NInVT)) + // The input promotes to the same size. Convert the promoted value. + return DAG.getNode(ISD::BIT_CONVERT, dl, + NOutVT, GetPromotedInteger(InOp)); + break; + case SoftenFloat: + // Promote the integer operand by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case ExpandInteger: + case ExpandFloat: + break; + case ScalarizeVector: + // Convert the element to an integer and promote it by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + case SplitVector: { + // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split + // pieces of the input into integers and reassemble in the final type. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + InOp = DAG.getNode(ISD::ANY_EXTEND, dl, + EVT::getIntegerVT(*DAG.getContext(), + NOutVT.getSizeInBits()), + JoinIntegers(Lo, Hi)); + return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp); + } + case WidenVector: + if (OutVT.bitsEq(NInVT)) + // The input is widened to the same size. Convert to the widened value. + return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp)); + } + + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + CreateStackStoreLoad(InOp, OutVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + DAG.getConstant(DiffBits, TLI.getPointerTy())); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { + // The pair element type may be legal, or may not promote to the same type as + // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), JoinIntegers(N->getOperand(0), + N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { + EVT VT = N->getValueType(0); + // FIXME there is no actual debug info here + DebugLoc dl = N->getDebugLoc(); + // Zero extend things like i1, sign extend everything else. It shouldn't + // matter in theory which one we pick, but this tends to give better code? + unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue Result = DAG.getNode(Opc, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), VT), + SDValue(N, 0)); + assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?"); + return Result; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && + "can only promote integers"); + EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0), + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + DebugLoc dl = N->getDebugLoc(); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op); + // Subtract off the extra leading bits in the bigger type. + return DAG.getNode(ISD::SUB, dl, NVT, Op, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), NVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + APInt TopBit(NVT.getSizeInBits(), 0); + TopBit.set(OVT.getSizeInBits()); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); + return DAG.getNode(ISD::CTTZ, dl, NVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0), + N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned NewOpc = N->getOpcode(); + DebugLoc dl = N->getDebugLoc(); + + // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is + // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT + // and SINT conversions are Custom, there is no way to tell which is + // preferable. We choose SINT because that's the right thing on PPC.) + if (N->getOpcode() == ISD::FP_TO_UINT && + !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) + NewOpc = ISD::FP_TO_SINT; + + SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + ISD::AssertZext : ISD::AssertSext, dl, + NVT, Res, DAG.getValueType(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); + + return DAG.getNode(ISD::AssertZext, dl, + NVT, Res, DAG.getValueType(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + + if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) { + SDValue Res = GetPromotedInteger(N->getOperand(0)); + assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!"); + + // If the result and operand types are the same after promotion, simplify + // to an in-register extension. + if (NVT == Res.getValueType()) { + // The high bits are not guaranteed to be anything. Insert an extend. + if (N->getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(N->getOperand(0).getValueType())); + if (N->getOpcode() == ISD::ZERO_EXTEND) + return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType()); + assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); + return Res; + } + } + + // Otherwise, just extend the original operand all the way to the larger type. + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + ISD::LoadExtType ExtType = + ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); + DebugLoc dl = N->getDebugLoc(); + SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT(), N->isVolatile(), + N->isNonTemporal(), N->getAlignment()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +/// Promote the overflow flag of an overflowing arithmetic node. +SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { + // Simply change the return type of the boolean result. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + EVT ValueVTs[] = { N->getValueType(0), NVT }; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; + SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + DAG.getVTList(ValueVTs, 2), Ops, 2); + + // Modified the sum result - switch anything that used the old sum to use + // the new one. + ReplaceValueWith(SDValue(N, 0), Res); + + return SDValue(Res.getNode(), 1); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // sign extension of its truncation to the original type. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: sign extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(OVT)); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(1)); + SDValue RHS = GetPromotedInteger(N->getOperand(2)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0),LHS,RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(2)); + SDValue RHS = GetPromotedInteger(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { + EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); + assert(isTypeLegal(SVT) && "Illegal SetCC type!"); + DebugLoc dl = N->getDebugLoc(); + + // Get the SETCC result using the canonical SETCC type. + SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); + + // Convert to the expected type. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); + return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { + return DAG.getNode(ISD::SHL, N->getDebugLoc(), + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), + GetPromotedInteger(N->getOperand(0)), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { + // The input may have strange things in the top bits of the registers, but + // these operations don't care. They may have weird bits going out, but + // that too is okay if they are integer operations. + SDValue LHS = GetPromotedInteger(N->getOperand(0)); + SDValue RHS = GetPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { + // The input value must be properly sign extended. + SDValue Res = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SRA, N->getDebugLoc(), + Res.getValueType(), Res, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { + // The input value must be properly zero extended. + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Res = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res; + + switch (getTypeAction(N->getOperand(0).getValueType())) { + default: llvm_unreachable("Unknown type action!"); + case Legal: + case ExpandInteger: + Res = N->getOperand(0); + break; + case PromoteInteger: + Res = GetPromotedInteger(N->getOperand(0)); + break; + } + + // Truncate to NVT instead of VT + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // zero extension of its truncation to the original type. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: zero extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT); + // The argument is passed as NumRegs registers of type RegVT. + + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned i = 0; i < NumRegs; ++i) { + Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); + Chain = Parts[i].getValue(1); + } + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::reverse(Parts.begin(), Parts.end()); + + // Assemble the parts in the promoted type. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]); + for (unsigned i = 1; i < NumRegs; ++i) { + SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); + // Shift it to the right position and "or" it in. + Part = DAG.getNode(ISD::SHL, dl, NVT, Part, + DAG.getConstant(i * RegVT.getSizeInBits(), + TLI.getPointerTy())); + Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part); + } + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { + assert(ResNo == 1 && "Only boolean result promotion currently supported!"); + return PromoteIntRes_Overflow(N); +} + +//===----------------------------------------------------------------------===// +// Integer Operand Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need promotion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + llvm_unreachable("Do not know how to promote this operator's operand!"); + + case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; + case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break; + case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; + case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break; + case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntOp_CONVERT_RNDSAT(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; + case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; + case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; + case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; + case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; + case ISD::FP16_TO_FP32: + case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; + case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// PromoteSetCCOperands - Promote the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, + ISD::CondCode CCCode) { + // We have to insert explicit sign or zero extends. Note that we could + // insert sign extends for ALL conditions, but zero extend is cheaper on + // many machines (an AND instead of two shifts), so prefer it. + switch (CCCode) { + default: llvm_unreachable("Unknown integer comparison!"); + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGE: + case ISD::SETUGT: + case ISD::SETULE: + case ISD::SETULT: + // ALL of these operations will work if we either sign or zero extend + // the operands (including the unsigned comparisons!). Zero extend is + // usually a simpler/cheaper operation, so prefer it. + NewLHS = ZExtPromotedInteger(NewLHS); + NewRHS = ZExtPromotedInteger(NewRHS); + break; + case ISD::SETGE: + case ISD::SETGT: + case ISD::SETLT: + case ISD::SETLE: + NewLHS = SExtPromotedInteger(NewLHS); + NewRHS = SExtPromotedInteger(NewRHS); + break; + } +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) { + // This should only occur in unusual situations like bitcasting to an + // x86_fp80, so just turn it into a store+load + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 2 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get()); + + // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always + // legal types. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)), + 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "only know how to promote condition"); + + // Promote all the way up to the canonical SetCC type. + EVT SVT = TLI.getSetCCResultType(MVT::Other); + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); + + // The chain (Op#0) and basic block destination (Op#2) are always legal types. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, + N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { + // Since the result type is legal, the operands must promote to it. + EVT OVT = N->getOperand(0).getValueType(); + SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); + SDValue Hi = GetPromotedInteger(N->getOperand(1)); + assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); + DebugLoc dl = N->getDebugLoc(); + + Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, + DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy())); + return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type is not. This implies + // that the vector is a power-of-two in length and that the element + // type does not have a strange size (eg: it is not i1). + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + + // Promote the inserted value. The type does not need to match the + // vector element type. Check that any extra bits introduced will be + // truncated away. + assert(N->getOperand(0).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + + SmallVector<SDValue, 16> NewOps; + for (unsigned i = 0; i < NumElts; ++i) + NewOps.push_back(GetPromotedInteger(N->getOperand(i))); + + return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) && + "can only promote integer arguments"); + SDValue InOp = GetPromotedInteger(N->getOperand(0)); + return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp, + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, + unsigned OpNo) { + if (OpNo == 1) { + // Promote the inserted value. This is valid because the type does not + // have to match the vector element type. + + // Check that any extra bits introduced will be truncated away. + assert(N->getOperand(1).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + GetPromotedInteger(N->getOperand(1)), + N->getOperand(2)), + 0); + } + + assert(OpNo == 2 && "Different operand and result vector types?"); + + // Promote the index. + SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), Idx), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { + SDValue NewOps[6]; + DebugLoc dl = N->getDebugLoc(); + NewOps[0] = N->getOperand(0); + for (unsigned i = 1; i < array_lengthof(NewOps); ++i) { + SDValue Flag = GetPromotedInteger(N->getOperand(i)); + NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); + } + return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { + // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote + // the operand in place. + return SDValue(DAG.UpdateNodeOperands(N, + GetPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Only know how to promote condition"); + + // Promote all the way up to the canonical SetCC type. + EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); + SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT); + + return SDValue(DAG.UpdateNodeOperands(N, Cond, + N->getOperand(1), N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get()); + + // The CC (#4) and the possible return values (#2 and #3) have legal types. + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get()); + + // The CC (#2) is always legal. + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + DebugLoc dl = N->getDebugLoc(); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), + Op, DAG.getValueType(N->getOperand(0).getValueType())); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, + SExtPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); + DebugLoc dl = N->getDebugLoc(); + + SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. + + // Truncate the value and store the result. + return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(), + SVOffset, N->getMemoryVT(), + isVolatile, isNonTemporal, Alignment); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, + ZExtPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue Op = GetPromotedInteger(N->getOperand(0)); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); +} + + +//===----------------------------------------------------------------------===// +// Integer Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand the result of this operator!"); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; + case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; + case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; + case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; + case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; + case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; + case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; + case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; + case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; + case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; + case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; + case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; + case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break; + case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break; + case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break; + case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break; + case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; + + case ISD::AND: + case ISD::OR: + case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; + + case ISD::ADD: + case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break; + + case ISD::ADDC: + case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break; + + case ISD::ADDE: + case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; + + case ISD::SADDO: + case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; + case ISD::UADDO: + case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedInteger(SDValue(N, ResNo), Lo, Hi); +} + +/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded, +/// and the shift amount is a constant 'Amt'. Expand the operation. +void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // Expand the incoming operand to be shifted, so that we have its parts + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + EVT NVT = InL.getValueType(); + unsigned VTBits = N->getValueType(0).getSizeInBits(); + unsigned NVTBits = NVT.getSizeInBits(); + EVT ShTy = N->getOperand(1).getValueType(); + + if (N->getOpcode() == ISD::SHL) { + if (Amt > VTBits) { + Lo = Hi = DAG.getConstant(0, NVT); + } else if (Amt > NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getNode(ISD::SHL, dl, + NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy)); + } else if (Amt == NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = InL; + } else if (Amt == 1 && + TLI.isOperationLegalOrCustom(ISD::ADDC, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { + // Emit this X << 1 as X+X. + SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + SDValue LoOps[2] = { InL, InL }; + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy)); + Hi = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(NVTBits-Amt, ShTy))); + } + return; + } + + if (N->getOpcode() == ISD::SRL) { + if (Amt > VTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, NVT); + } else if (Amt > NVTBits) { + Lo = DAG.getNode(ISD::SRL, dl, + NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy)); + Hi = DAG.getConstant(0, NVT); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getConstant(0, NVT); + } else { + Lo = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(NVTBits-Amt, ShTy))); + Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + } + return; + } + + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + if (Amt > VTBits) { + Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Amt > NVTBits) { + Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(Amt-NVTBits, ShTy)); + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else { + Lo = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(NVTBits-Amt, ShTy))); + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + } +} + +/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify +/// this shift based on knowledge of the high bit of the shift amount. If we +/// can tell this, we know that it is >= 32 or < 32, without knowing the actual +/// shift amount. +bool DAGTypeLegalizer:: +ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); + unsigned ShBits = ShTy.getScalarType().getSizeInBits(); + unsigned NVTBits = NVT.getScalarType().getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + DebugLoc dl = N->getDebugLoc(); + + APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne); + + // If we don't know anything about the high bits, exit. + if (((KnownZero|KnownOne) & HighBitMask) == 0) + return false; + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + // If we know that any of the high bits of the shift amount are one, then we + // can do this as a couple of simple shifts. + if (KnownOne.intersects(HighBitMask)) { + // Mask out the high bit, which we know is set. + Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, + DAG.getConstant(~HighBitMask, ShTy)); + + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: + Lo = DAG.getConstant(0, NVT); // Low part is zero. + Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. + return true; + case ISD::SRL: + Hi = DAG.getConstant(0, NVT); // Hi part is zero. + Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + case ISD::SRA: + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. + DAG.getConstant(NVTBits-1, ShTy)); + Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + } + } + +#if 0 + // FIXME: This code is broken for shifts with a zero amount! + // If we know that all of the high bits of the shift amount are zero, then we + // can do this as a couple of simple shifts. + if ((KnownZero & HighBitMask) == HighBitMask) { + // Compute 32-amt. + SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy, + DAG.getConstant(NVTBits, ShTy), + Amt); + unsigned Op1, Op2; + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break; + case ISD::SRL: + case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; + } + + Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, NVT, + DAG.getNode(Op1, NVT, InH, Amt), + DAG.getNode(Op2, NVT, InL, Amt2)); + return true; + } +#endif + + return false; +} + +/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift +/// of any size. +bool DAGTypeLegalizer:: +ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); + unsigned NVTBits = NVT.getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + DebugLoc dl = N->getDebugLoc(); + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); + SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode); + SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); + SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), + Amt, NVBitsNode, ISD::SETULT); + + SDValue LoS, HiS, LoL, HiL; + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: + // Short: ShAmt < NVTBits + LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); + HiS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack)); + + // Long: ShAmt >= NVTBits + LoL = DAG.getConstant(0, NVT); // Lo part is zero. + HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + return true; + case ISD::SRL: + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getConstant(0, NVT); // Hi part is zero. + LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + return true; + case ISD::SRA: + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part. + DAG.getConstant(NVTBits-1, ShTy)); + LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + return true; + } + + return false; +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + + EVT NVT = LHSL.getValueType(); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support + // them. TODO: Teach operation legalization how to expand unsupported + // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate + // a carry of type MVT::Flag, but there doesn't seem to be any way to + // generate a value of this type in the expanded code sequence. + bool hasCarry = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::ADDC : ISD::SUBC, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + + if (hasCarry) { + SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + } + } else { + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); + SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], + ISD::SETULT); + SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], + ISD::SETULT); + SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, + DAG.getConstant(1, NVT), Carry1); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); + } else { + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); + SDValue Cmp = + DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), + LoOps[0], LoOps[1], ISD::SETULT); + SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); + } + } +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + if (N->getOpcode() == ISD::ADDC) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + } + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; + SDValue HiOps[3] = { LHSH, RHSH }; + + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is any extension of the input (which degenerates to a copy). + Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op); + Hi = DAG.getUNDEF(NVT); // The high part is undefined. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part replicates the sign bit of Lo, make it explicit. + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(NVTBits-1, TLI.getPointerTy())); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part must be zero, make it explicit. + Hi = DAG.getConstant(0, NVT); + } +} + +void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned NBitWidth = NVT.getSizeInBits(); + const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue(); + Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + + SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, + DAG.getConstant(0, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, + DAG.getNode(ISD::ADD, dl, NVT, LoLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo), + DAG.getNode(ISD::CTPOP, dl, NVT, Hi)); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + + SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, + DAG.getConstant(0, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, + DAG.getNode(ISD::ADD, dl, NVT, HiLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); + SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); + SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, + SDValue &Lo, SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + ISD::LoadExtType ExtType = N->getExtensionType(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); + DebugLoc dl = N->getDebugLoc(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + EVT MemVT = N->getMemoryVT(); + + Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + MemVT, isVolatile, isNonTemporal, Alignment); + + // Remember the chain. + Ch = Lo.getValue(1); + + if (ExtType == ISD::SEXTLOAD) { + // The high part is obtained by SRA'ing all but one of the bits of the + // lo part. + unsigned LoSize = Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + } else if (ExtType == ISD::ZEXTLOAD) { + // The high part is just a zero. + Hi = DAG.getConstant(0, NVT); + } else { + assert(ExtType == ISD::EXTLOAD && "Unknown extload!"); + // The high part is undefined. + Hi = DAG.getUNDEF(NVT); + } + } else if (TLI.isLittleEndian()) { + // Little-endian - low bits are at low addresses. + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + isVolatile, isNonTemporal, Alignment); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, NEVT, + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + } else { + // Big-endian - high bits are at low addresses. Favor aligned loads at + // the cost of some bit-fiddling. + EVT MemVT = N->getMemoryVT(); + unsigned EBytes = MemVT.getStoreSize(); + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + + // Load both the high bits and maybe some of the low bits. + Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + EVT::getIntegerVT(*DAG.getContext(), + MemVT.getSizeInBits() - ExcessBits), + isVolatile, isNonTemporal, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Load the rest of the low bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer low bits from the bottom of Hi to the top of Lo. + Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, + DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + // Move high bits to the right position in Hi. + Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, + NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + } + } + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); +} + +void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH); +} + +void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + DebugLoc dl = N->getDebugLoc(); + + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT); + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT); + if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + unsigned OuterBitSize = VT.getSizeInBits(); + unsigned InnerBitSize = NVT.getSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); + unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); + + APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); + if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && + DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { + // The inputs are both zero-extended. + if (HasUMUL_LOHI) { + // We can emit a umul_lohi. + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return; + } + if (HasMULHU) { + // We can emit a mulhu+mul. + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); + return; + } + } + if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { + // The input values are both sign-extended. + if (HasSMUL_LOHI) { + // We can emit a smul_lohi. + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return; + } + if (HasMULHS) { + // We can emit a mulhs+mul. + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL); + return; + } + } + if (HasUMUL_LOHI) { + // Lo,Hi = umul LHS, RHS. + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(NVT, NVT), LL, RL); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); + return; + } + if (HasMULHU) { + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); + RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); + return; + } + } + + // If nothing else, we can make a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (VT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (VT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (VT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + DebugLoc dl = Node->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Compute the overflow. + // + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + EVT OType = Node->getValueType(1); + SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(Node, 1), Cmp); +} + +void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::SDIV_I16; + else if (VT == MVT::i32) + LC = RTLIB::SDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::SDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::SDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // If we can emit an efficient shift operation, do so now. Check to see if + // the RHS is a constant. + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi); + + // If we can determine that the high bit of the shift is zero or one, even if + // the low bits are variable, emit this shift in an optimized form. + if (ExpandShiftWithKnownAmountBit(N, Lo, Hi)) + return; + + // If this target supports shift_PARTS, use it. First, map to the _PARTS opc. + unsigned PartsOpc; + if (N->getOpcode() == ISD::SHL) { + PartsOpc = ISD::SHL_PARTS; + } else if (N->getOpcode() == ISD::SRL) { + PartsOpc = ISD::SRL_PARTS; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + PartsOpc = ISD::SRA_PARTS; + } + + // Next check to see if the target supports this SHL_PARTS operation or if it + // will custom expand it. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT); + if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || + Action == TargetLowering::Custom) { + // Expand the subcomponents. + SDValue LHSL, LHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + + SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) }; + EVT VT = LHSL.getValueType(); + Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); + Hi = Lo.getValue(1); + return; + } + + // Otherwise, emit a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + bool isSigned; + if (N->getOpcode() == ISD::SHL) { + isSigned = false; /*sign irrelevant*/ + if (VT == MVT::i16) + LC = RTLIB::SHL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SHL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SHL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SHL_I128; + } else if (N->getOpcode() == ISD::SRL) { + isSigned = false; + if (VT == MVT::i16) + LC = RTLIB::SRL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRL_I128; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + isSigned = true; + if (VT == MVT::i16) + LC = RTLIB::SRA_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRA_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRA_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRA_I128; + } + + if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + return; + } + + if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi)) + llvm_unreachable("Unsupported shift!"); +} + +void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is sign extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0)); + // The high part is obtained by SRA'ing all but one of the bits of low part. + unsigned LoSize = NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); + } +} + +void DAGTypeLegalizer:: +ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + + if (EVT.bitsLE(Lo.getValueType())) { + // sext_inreg the low part if needed. + Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo, + N->getOperand(1)); + + // The high part gets the sign extension from the lo-part. This handles + // things like sextinreg V:i64 from i8. + Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, + DAG.getConstant(Hi.getValueType().getSizeInBits()-1, + TLI.getPointerTy())); + } else { + // For example, extension of an i48 to an i64. Leave the low part alone, + // sext_inreg the high part. + unsigned ExcessBits = + EVT.getSizeInBits() - Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); + } +} + +void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::SREM_I16; + else if (VT == MVT::i32) + LC = RTLIB::SREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::SREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::SREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); + Hi = DAG.getNode(ISD::SRL, dl, + N->getOperand(0).getValueType(), N->getOperand(0), + DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy())); + Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Calculate the overflow: addition overflows iff a + b < a, and subtraction + // overflows iff a - b > a. + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, + N->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + +void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::UDIV_I16; + else if (VT == MVT::i32) + LC = RTLIB::UDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::UDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::UDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::UREM_I16; + else if (VT == MVT::i32) + LC = RTLIB::UREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::UREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::UREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is zero extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); + Hi = DAG.getConstant(0, NVT); // The high part is just a zero. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getZeroExtendInReg(Hi, dl, + EVT::getIntegerVT(*DAG.getContext(), + ExcessBits)); + } +} + + +//===----------------------------------------------------------------------===// +// Integer Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need expansion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + llvm_unreachable("Do not know how to expand this operator's operand!"); + + case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; + case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; + case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; + case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; + case ISD::RETURNADDR: + case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(NewLHS, LHSLo, LHSHi); + GetExpandedInteger(NewRHS, RHSLo, RHSHi); + + if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { + if (RHSLo == RHSHi) { + if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) { + if (RHSCST->isAllOnesValue()) { + // Equality comparison to -1. + NewLHS = DAG.getNode(ISD::AND, dl, + LHSLo.getValueType(), LHSLo, LHSHi); + NewRHS = RHSLo; + return; + } + } + } + + NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo); + NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi); + NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS); + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + return; + } + + // If this is a comparison of the sign bit, just look at the top part. + // X > -1, x < 0 + if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS)) + if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0 + (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1 + NewLHS = LHSHi; + NewRHS = RHSHi; + return; + } + + // FIXME: This generated code sucks. + ISD::CondCode LowCC; + switch (CCCode) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETLT: + case ISD::SETULT: LowCC = ISD::SETULT; break; + case ISD::SETGT: + case ISD::SETUGT: LowCC = ISD::SETUGT; break; + case ISD::SETLE: + case ISD::SETULE: LowCC = ISD::SETULE; break; + case ISD::SETGE: + case ISD::SETUGE: LowCC = ISD::SETUGE; break; + } + + // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison + // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands + // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2; + + // NOTE: on targets without efficient SELECT of bools, we can always use + // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL); + SDValue Tmp1, Tmp2; + Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); + if (!Tmp1.getNode()) + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC); + Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); + if (!Tmp2.getNode()) + Tmp2 = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, DAG.getCondCode(CCCode)); + + ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode()); + ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode()); + if ((Tmp1C && Tmp1C->isNullValue()) || + (Tmp2C && Tmp2C->isNullValue() && + (CCCode == ISD::SETLE || CCCode == ISD::SETGE || + CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) || + (Tmp2C && Tmp2C->getAPIntValue() == 1 && + (CCCode == ISD::SETLT || CCCode == ISD::SETGT || + CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) { + // low part is known false, returns high part. + // For LE / GE, if high part is known false, ignore the low part. + // For LT / GT, if high part is known true, ignore the low part. + NewLHS = Tmp2; + NewRHS = SDValue(); + return; + } + + NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ, false, + DagCombineInfo, dl); + if (!NewLHS.getNode()) + NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ); + NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(), + NewLHS, Tmp1, Tmp2); + NewRHS = SDValue(); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { + // The value being shifted is legal, but the shift amount is too big. + // It follows that either the result of the shift is undefined, or the + // upper half of the shift amount is zero. Just use the lower half. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(1), Lo, Hi); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { + // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This + // surely makes pretty nice problems on 8/16 bit targets. Just truncate this + // constant to valid type. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(0), Lo, Hi); + return SDValue(DAG.UpdateNodeOperands(N, Lo), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + EVT DstVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this SINT_TO_FP!"); + return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + + EVT VT = N->getOperand(1).getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); + DebugLoc dl = N->getDebugLoc(); + SDValue Lo, Hi; + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + GetExpandedInteger(N->getValue(), Lo, Hi); + return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + N->getMemoryVT(), isVolatile, isNonTemporal, + Alignment); + } else if (TLI.isLittleEndian()) { + // Little-endian - low bits are at low addresses. + GetExpandedInteger(N->getValue(), Lo, Hi); + + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + isVolatile, isNonTemporal, Alignment); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, NEVT, + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } else { + // Big-endian - high bits are at low addresses. Favor aligned stores at + // the cost of some bit-fiddling. + GetExpandedInteger(N->getValue(), Lo, Hi); + + EVT ExtVT = N->getMemoryVT(); + unsigned EBytes = ExtVT.getStoreSize(); + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), + ExtVT.getSizeInBits() - ExcessBits); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer high bits from the top of Lo to the bottom of Hi. + Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SRL, dl, NVT, Lo, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + } + + // Store both the high bits and maybe some of the low bits. + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), + SVOffset, HiVT, isVolatile, isNonTemporal, + Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Store the lowest ExcessBits bits in the second half. + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } +} + +SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + // Just truncate the low part of the source. + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); +} + +static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP format"); + case MVT::f32: return &APFloat::IEEEsingle; + case MVT::f64: return &APFloat::IEEEdouble; + case MVT::f80: return &APFloat::x87DoubleExtended; + case MVT::f128: return &APFloat::IEEEquad; + case MVT::ppcf128: return &APFloat::PPCDoubleDouble; + } +} + +SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + EVT SrcVT = Op.getValueType(); + EVT DstVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // The following optimization is valid only if every value in SrcVT (when + // treated as signed) is representable in DstVT. Check that the mantissa + // size of DstVT is >= than the number of bits in SrcVT -1. + const fltSemantics *sem = EVTToAPFloatSemantics(DstVT); + if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 && + TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ + // Do a signed conversion then adjust the result. + SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); + SignedConv = TLI.LowerOperation(SignedConv, DAG); + + // The result of the signed conversion needs adjusting if the 'sign bit' of + // the incoming integer was set. To handle this, we dynamically test to see + // if it is set, and, if so, add a fudge factor. + + const uint64_t F32TwoE32 = 0x4F800000ULL; + const uint64_t F32TwoE64 = 0x5F800000ULL; + const uint64_t F32TwoE128 = 0x7F800000ULL; + + APInt FF(32, 0); + if (SrcVT == MVT::i32) + FF = APInt(32, F32TwoE32); + else if (SrcVT == MVT::i64) + FF = APInt(32, F32TwoE64); + else if (SrcVT == MVT::i128) + FF = APInt(32, F32TwoE128); + else + assert(false && "Unsupported UINT_TO_FP!"); + + // Check whether the sign bit is set. + SDValue Lo, Hi; + GetExpandedInteger(Op, Lo, Hi); + SDValue SignSet = DAG.getSetCC(dl, + TLI.getSetCCResultType(Hi.getValueType()), + Hi, DAG.getConstant(0, Hi.getValueType()), + ISD::SETLT); + + // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. + SDValue FudgePtr = DAG.getConstantPool( + ConstantInt::get(*DAG.getContext(), FF.zext(64)), + TLI.getPointerTy()); + + // Get a pointer to FF if the sign bit was set, or to 0 otherwise. + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Four = DAG.getIntPtrConstant(4); + if (TLI.isBigEndian()) std::swap(Zero, Four); + SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, + Zero, Four); + unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); + FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset); + Alignment = std::min(Alignment, 4u); + + // Load the value out, extending it from f32 to the destination float type. + // FIXME: Avoid the extend by constructing the right constant pool? + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(), + FudgePtr, NULL, 0, MVT::f32, + false, false, Alignment); + return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); + } + + // Otherwise, use a libcall. + RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this UINT_TO_FP!"); + return MakeLibCall(LC, DstVT, &Op, 1, true, dl); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp new file mode 100644 index 0000000..6e56c98 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -0,0 +1,1114 @@ +//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeTypes method. It transforms +// an arbitrary well-formed SelectionDAG to only consist of legal types. This +// is common code shared among the LegalizeTypes*.cpp files. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CallingConv.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<bool> +EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); + +/// PerformExpensiveChecks - Do extensive, expensive, sanity checking. +void DAGTypeLegalizer::PerformExpensiveChecks() { + // If a node is not processed, then none of its values should be mapped by any + // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + + // If a node is processed, then each value with an illegal type must be mapped + // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + // Values with a legal type may be mapped by ReplacedValues, but not by any of + // the other maps. + + // Note that these invariants may not hold momentarily when processing a node: + // the node being processed may be put in a map before being marked Processed. + + // Note that it is possible to have nodes marked NewNode in the DAG. This can + // occur in two ways. Firstly, a node may be created during legalization but + // never passed to the legalization core. This is usually due to the implicit + // folding that occurs when using the DAG.getNode operators. Secondly, a new + // node may be passed to the legalization core, but when analyzed may morph + // into a different node, leaving the original node as a NewNode in the DAG. + // A node may morph if one of its operands changes during analysis. Whether + // it actually morphs or not depends on whether, after updating its operands, + // it is equivalent to an existing node: if so, it morphs into that existing + // node (CSE). An operand can change during analysis if the operand is a new + // node that morphs, or it is a processed value that was mapped to some other + // value (as recorded in ReplacedValues) in which case the operand is turned + // into that other value. If a node morphs then the node it morphed into will + // be used instead of it for legalization, however the original node continues + // to live on in the DAG. + // The conclusion is that though there may be nodes marked NewNode in the DAG, + // all uses of such nodes are also marked NewNode: the result is a fungus of + // NewNodes growing on top of the useful nodes, and perhaps using them, but + // not used by them. + + // If a value is mapped by ReplacedValues, then it must have no uses, except + // by nodes marked NewNode (see above). + + // The final node obtained by mapping by ReplacedValues is not marked NewNode. + // Note that ReplacedValues should be applied iteratively. + + // Note that the ReplacedValues map may also map deleted nodes (by iterating + // over the DAG we never dereference deleted nodes). This means that it may + // also map nodes marked NewNode if the deallocated memory was reallocated as + // another node, and that new node was not seen by the LegalizeTypes machinery + // (for example because it was created but not used). In general, we cannot + // distinguish between new nodes and deleted nodes. + SmallVector<SDNode*, 16> NewNodes; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + // Remember nodes marked NewNode - they are subject to extra checking below. + if (I->getNodeId() == NewNode) + NewNodes.push_back(I); + + for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) { + SDValue Res(I, i); + bool Failed = false; + + unsigned Mapped = 0; + if (ReplacedValues.find(Res) != ReplacedValues.end()) { + Mapped |= 1; + // Check that remapped values are only used by nodes marked NewNode. + for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + + // Check that the final result of applying ReplacedValues is not + // marked NewNode. + SDValue NewVal = ReplacedValues[Res]; + DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal); + while (I != ReplacedValues.end()) { + NewVal = I->second; + I = ReplacedValues.find(NewVal); + } + assert(NewVal.getNode()->getNodeId() != NewNode && + "ReplacedValues maps to a new node!"); + } + if (PromotedIntegers.find(Res) != PromotedIntegers.end()) + Mapped |= 2; + if (SoftenedFloats.find(Res) != SoftenedFloats.end()) + Mapped |= 4; + if (ScalarizedVectors.find(Res) != ScalarizedVectors.end()) + Mapped |= 8; + if (ExpandedIntegers.find(Res) != ExpandedIntegers.end()) + Mapped |= 16; + if (ExpandedFloats.find(Res) != ExpandedFloats.end()) + Mapped |= 32; + if (SplitVectors.find(Res) != SplitVectors.end()) + Mapped |= 64; + if (WidenedVectors.find(Res) != WidenedVectors.end()) + Mapped |= 128; + + if (I->getNodeId() != Processed) { + // Since we allow ReplacedValues to map deleted nodes, it may map nodes + // marked NewNode too, since a deleted node may have been reallocated as + // another node that has not been seen by the LegalizeTypes machinery. + if ((I->getNodeId() == NewNode && Mapped > 1) || + (I->getNodeId() != NewNode && Mapped != 0)) { + dbgs() << "Unprocessed value in a map!"; + Failed = true; + } + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { + if (Mapped > 1) { + dbgs() << "Value with legal type was transformed!"; + Failed = true; + } + } else { + if (Mapped == 0) { + dbgs() << "Processed value not in any map!"; + Failed = true; + } else if (Mapped & (Mapped - 1)) { + dbgs() << "Value in multiple maps!"; + Failed = true; + } + } + + if (Failed) { + if (Mapped & 1) + dbgs() << " ReplacedValues"; + if (Mapped & 2) + dbgs() << " PromotedIntegers"; + if (Mapped & 4) + dbgs() << " SoftenedFloats"; + if (Mapped & 8) + dbgs() << " ScalarizedVectors"; + if (Mapped & 16) + dbgs() << " ExpandedIntegers"; + if (Mapped & 32) + dbgs() << " ExpandedFloats"; + if (Mapped & 64) + dbgs() << " SplitVectors"; + if (Mapped & 128) + dbgs() << " WidenedVectors"; + dbgs() << "\n"; + llvm_unreachable(0); + } + } + } + + // Checked that NewNodes are only used by other NewNodes. + for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { + SDNode *N = NewNodes[i]; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); + } +} + +/// run - This is the main entry point for the type legalizer. This does a +/// top-down traversal of the dag, legalizing types as it goes. Returns "true" +/// if it made any changes. +bool DAGTypeLegalizer::run() { + bool Changed = false; + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + Dummy.setNodeId(Unanalyzed); + + // The root of the dag may dangle to deleted nodes until the type legalizer is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess' + // (and remembering them) if they are leaves and assigning 'Unanalyzed' if + // non-leaves. + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + if (I->getNumOperands() == 0) { + I->setNodeId(ReadyToProcess); + Worklist.push_back(I); + } else { + I->setNodeId(Unanalyzed); + } + } + + // Now that we have a set of nodes to process, handle them all. + while (!Worklist.empty()) { +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + SDNode *N = Worklist.back(); + Worklist.pop_back(); + assert(N->getNodeId() == ReadyToProcess && + "Node should be ready if on worklist!"); + + if (IgnoreNodeResults(N)) + goto ScanOperands; + + // Scan the values produced by the node, checking to see if any result + // types are illegal. + for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { + EVT ResultVT = N->getValueType(i); + switch (getTypeAction(ResultVT)) { + default: + assert(false && "Unknown action!"); + case Legal: + break; + // The following calls must take care of *all* of the node's results, + // not just the illegal result they were passed (this includes results + // with a legal type). Results can be remapped using ReplaceValueWith, + // or their promoted/expanded/etc values registered in PromotedIntegers, + // ExpandedIntegers etc. + case PromoteInteger: + PromoteIntegerResult(N, i); + Changed = true; + goto NodeDone; + case ExpandInteger: + ExpandIntegerResult(N, i); + Changed = true; + goto NodeDone; + case SoftenFloat: + SoftenFloatResult(N, i); + Changed = true; + goto NodeDone; + case ExpandFloat: + ExpandFloatResult(N, i); + Changed = true; + goto NodeDone; + case ScalarizeVector: + ScalarizeVectorResult(N, i); + Changed = true; + goto NodeDone; + case SplitVector: + SplitVectorResult(N, i); + Changed = true; + goto NodeDone; + case WidenVector: + WidenVectorResult(N, i); + Changed = true; + goto NodeDone; + } + } + +ScanOperands: + // Scan the operand list for the node, handling any nodes with operands that + // are illegal. + { + unsigned NumOperands = N->getNumOperands(); + bool NeedsReanalyzing = false; + unsigned i; + for (i = 0; i != NumOperands; ++i) { + if (IgnoreNodeResults(N->getOperand(i).getNode())) + continue; + + EVT OpVT = N->getOperand(i).getValueType(); + switch (getTypeAction(OpVT)) { + default: + assert(false && "Unknown action!"); + case Legal: + continue; + // The following calls must either replace all of the node's results + // using ReplaceValueWith, and return "false"; or update the node's + // operands in place, and return "true". + case PromoteInteger: + NeedsReanalyzing = PromoteIntegerOperand(N, i); + Changed = true; + break; + case ExpandInteger: + NeedsReanalyzing = ExpandIntegerOperand(N, i); + Changed = true; + break; + case SoftenFloat: + NeedsReanalyzing = SoftenFloatOperand(N, i); + Changed = true; + break; + case ExpandFloat: + NeedsReanalyzing = ExpandFloatOperand(N, i); + Changed = true; + break; + case ScalarizeVector: + NeedsReanalyzing = ScalarizeVectorOperand(N, i); + Changed = true; + break; + case SplitVector: + NeedsReanalyzing = SplitVectorOperand(N, i); + Changed = true; + break; + case WidenVector: + NeedsReanalyzing = WidenVectorOperand(N, i); + Changed = true; + break; + } + break; + } + + // The sub-method updated N in place. Check to see if any operands are new, + // and if so, mark them. If the node needs revisiting, don't add all users + // to the worklist etc. + if (NeedsReanalyzing) { + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(NewNode); + // Recompute the NodeId and correct processed operands, adding the node to + // the worklist if ready. + SDNode *M = AnalyzeNewNode(N); + if (M == N) + // The node didn't morph - nothing special to do, it will be revisited. + continue; + + // The node morphed - this is equivalent to legalizing by replacing every + // value of N with the corresponding value of M. So do that now. + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + // Replacing the value takes care of remapping the new value. + ReplaceValueWith(SDValue(N, i), SDValue(M, i)); + assert(N->getNodeId() == NewNode && "Unexpected node state!"); + // The node continues to live on as part of the NewNode fungus that + // grows on top of the useful nodes. Nothing more needs to be done + // with it - move on to the next node. + continue; + } + + if (i == NumOperands) { + DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n"); + } + } +NodeDone: + + // If we reach here, the node was processed, potentially creating new nodes. + // Mark it as processed and add its users to the worklist as appropriate. + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(Processed); + + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + int NodeId = User->getNodeId(); + + // This node has two options: it can either be a new node or its Node ID + // may be a count of the number of operands it has that are not ready. + if (NodeId > 0) { + User->setNodeId(NodeId-1); + + // If this was the last use it was waiting on, add it to the ready list. + if (NodeId-1 == ReadyToProcess) + Worklist.push_back(User); + continue; + } + + // If this is an unreachable new node, then ignore it. If it ever becomes + // reachable by being used by a newly created node then it will be handled + // by AnalyzeNewNode. + if (NodeId == NewNode) + continue; + + // Otherwise, this node is new: this is the first operand of it that + // became ready. Its new NodeId is the number of operands it has minus 1 + // (as this node is now processed). + assert(NodeId == Unanalyzed && "Unknown node ID!"); + User->setNodeId(User->getNumOperands() - 1); + + // If the node only has a single operand, it is now ready. + if (User->getNumOperands() == 1) + Worklist.push_back(User); + } + } + +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + // If the root changed (e.g. it was a dead load) update the root. + DAG.setRoot(Dummy.getValue()); + + // Remove dead nodes. This is important to do for cleanliness but also before + // the checking loop below. Implicit folding by the DAG.getNode operators and + // node morphing can cause unreachable nodes to be around with their flags set + // to new. + DAG.RemoveDeadNodes(); + + // In a debug build, scan all the nodes to make sure we found them all. This + // ensures that there are no cycles and that everything got processed. +#ifndef NDEBUG + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + bool Failed = false; + + // Check that all result types are legal. + if (!IgnoreNodeResults(I)) + for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) + if (!isTypeLegal(I->getValueType(i))) { + dbgs() << "Result type " << i << " illegal!\n"; + Failed = true; + } + + // Check that all operand types are legal. + for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) + if (!IgnoreNodeResults(I->getOperand(i).getNode()) && + !isTypeLegal(I->getOperand(i).getValueType())) { + dbgs() << "Operand type " << i << " illegal!\n"; + Failed = true; + } + + if (I->getNodeId() != Processed) { + if (I->getNodeId() == NewNode) + dbgs() << "New node not analyzed?\n"; + else if (I->getNodeId() == Unanalyzed) + dbgs() << "Unanalyzed node not noticed?\n"; + else if (I->getNodeId() > 0) + dbgs() << "Operand not processed?\n"; + else if (I->getNodeId() == ReadyToProcess) + dbgs() << "Not added to worklist?\n"; + Failed = true; + } + + if (Failed) { + I->dump(&DAG); dbgs() << "\n"; + llvm_unreachable(0); + } + } +#endif + + return Changed; +} + +/// AnalyzeNewNode - The specified node is the root of a subtree of potentially +/// new nodes. Correct any processed operands (this may change the node) and +/// calculate the NodeId. If the node itself changes to a processed node, it +/// is not remapped - the caller needs to take care of this. +/// Returns the potentially changed node. +SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { + // If this was an existing node that is already done, we're done. + if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed) + return N; + + // Remove any stale map entries. + ExpungeNode(N); + + // Okay, we know that this node is new. Recursively walk all of its operands + // to see if they are new also. The depth of this walk is bounded by the size + // of the new tree that was constructed (usually 2-3 nodes), so we don't worry + // about revisiting of nodes. + // + // As we walk the operands, keep track of the number of nodes that are + // processed. If non-zero, this will become the new nodeid of this node. + // Operands may morph when they are analyzed. If so, the node will be + // updated after all operands have been analyzed. Since this is rare, + // the code tries to minimize overhead in the non-morphing case. + + SmallVector<SDValue, 8> NewOps; + unsigned NumProcessed = 0; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue OrigOp = N->getOperand(i); + SDValue Op = OrigOp; + + AnalyzeNewValue(Op); // Op may morph. + + if (Op.getNode()->getNodeId() == Processed) + ++NumProcessed; + + if (!NewOps.empty()) { + // Some previous operand changed. Add this one to the list. + NewOps.push_back(Op); + } else if (Op != OrigOp) { + // This is the first operand to change - add all operands so far. + NewOps.append(N->op_begin(), N->op_begin() + i); + NewOps.push_back(Op); + } + } + + // Some operands changed - update the node. + if (!NewOps.empty()) { + SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size()); + if (M != N) { + // The node morphed into a different node. Normally for this to happen + // the original node would have to be marked NewNode. However this can + // in theory momentarily not be the case while ReplaceValueWith is doing + // its stuff. Mark the original node NewNode to help sanity checking. + N->setNodeId(NewNode); + if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed) + // It morphed into a previously analyzed node - nothing more to do. + return M; + + // It morphed into a different new node. Do the equivalent of passing + // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need + // to remap the operands, since they are the same as the operands we + // remapped above. + N = M; + ExpungeNode(N); + } + } + + // Calculate the NodeId. + N->setNodeId(N->getNumOperands() - NumProcessed); + if (N->getNodeId() == ReadyToProcess) + Worklist.push_back(N); + + return N; +} + +/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed. +/// If the node changes to a processed node, then remap it. +void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { + Val.setNode(AnalyzeNewNode(Val.getNode())); + if (Val.getNode()->getNodeId() == Processed) + // We were passed a processed node, or it morphed into one - remap it. + RemapValue(Val); +} + +/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it. +/// This can occur when a node is deleted then reallocated as a new node - +/// the mapping in ReplacedValues applies to the deleted node, not the new +/// one. +/// The only map that can have a deleted node as a source is ReplacedValues. +/// Other maps can have deleted nodes as targets, but since their looked-up +/// values are always immediately remapped using RemapValue, resulting in a +/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue +/// always performs correct mappings. In order to keep the mapping correct, +/// ExpungeNode should be called on any new nodes *before* adding them as +/// either source or target to ReplacedValues (which typically means calling +/// Expunge when a new node is first seen, since it may no longer be marked +/// NewNode by the time it is added to ReplacedValues). +void DAGTypeLegalizer::ExpungeNode(SDNode *N) { + if (N->getNodeId() != NewNode) + return; + + // If N is not remapped by ReplacedValues then there is nothing to do. + unsigned i, e; + for (i = 0, e = N->getNumValues(); i != e; ++i) + if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end()) + break; + + if (i == e) + return; + + // Remove N from all maps - this is expensive but rare. + + for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(), + E = PromotedIntegers.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(), + E = SoftenedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(), + E = ScalarizedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(), + E = WidenedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){ + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(), + E = ReplacedValues.end(); I != E; ++I) + RemapValue(I->second); + + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + ReplacedValues.erase(SDValue(N, i)); +} + +/// RemapValue - If the specified value was already legalized to another value, +/// replace it by that value. +void DAGTypeLegalizer::RemapValue(SDValue &N) { + DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N); + if (I != ReplacedValues.end()) { + // Use path compression to speed up future lookups if values get multiply + // replaced with other values. + RemapValue(I->second); + N = I->second; + assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!"); + } +} + +namespace { + /// NodeUpdateListener - This class is a DAGUpdateListener that listens for + /// updates to nodes and recomputes their ready state. + class NodeUpdateListener : public SelectionDAG::DAGUpdateListener { + DAGTypeLegalizer &DTL; + SmallSetVector<SDNode*, 16> &NodesToAnalyze; + public: + explicit NodeUpdateListener(DAGTypeLegalizer &dtl, + SmallSetVector<SDNode*, 16> &nta) + : DTL(dtl), NodesToAnalyze(nta) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + // It is possible, though rare, for the deleted node N to occur as a + // target in a map, so note the replacement N -> E in ReplacedValues. + assert(E && "Node not replaced?"); + DTL.NoteDeletion(N, E); + + // In theory the deleted node could also have been scheduled for analysis. + // So remove it from the set of nodes which will be analyzed. + NodesToAnalyze.remove(N); + + // In general nothing needs to be done for E, since it didn't change but + // only gained new uses. However N -> E was just added to ReplacedValues, + // and the result of a ReplacedValues mapping is not allowed to be marked + // NewNode. So if E is marked NewNode, then it needs to be analyzed. + if (E->getNodeId() == DAGTypeLegalizer::NewNode) + NodesToAnalyze.insert(E); + } + + virtual void NodeUpdated(SDNode *N) { + // Node updates can mean pretty much anything. It is possible that an + // operand was set to something already processed (f.e.) in which case + // this node could become ready. Recompute its flags. + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + N->setNodeId(DAGTypeLegalizer::NewNode); + NodesToAnalyze.insert(N); + } + }; +} + + +/// ReplaceValueWith - The specified value was legalized to the specified other +/// value. Update the DAG and NodeIds replacing any uses of From to use To +/// instead. +void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { + assert(From.getNode() != To.getNode() && "Potential legalization loop!"); + + // If expansion produced new nodes, make sure they are properly marked. + ExpungeNode(From.getNode()); + AnalyzeNewValue(To); // Expunges To. + + // Anything that used the old node should now use the new one. Note that this + // can potentially cause recursive merging. + SmallSetVector<SDNode*, 16> NodesToAnalyze; + NodeUpdateListener NUL(*this, NodesToAnalyze); + do { + DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); + + // The old node may still be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + ReplacedValues[From] = To; + + // Process the list of nodes that need to be reanalyzed. + while (!NodesToAnalyze.empty()) { + SDNode *N = NodesToAnalyze.back(); + NodesToAnalyze.pop_back(); + if (N->getNodeId() != DAGTypeLegalizer::NewNode) + // The node was analyzed while reanalyzing an earlier node - it is safe + // to skip. Note that this is not a morphing node - otherwise it would + // still be marked NewNode. + continue; + + // Analyze the node's operands and recalculate the node ID. + SDNode *M = AnalyzeNewNode(N); + if (M != N) { + // The node morphed into a different node. Make everyone use the new + // node instead. + assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + SDValue OldVal(N, i); + SDValue NewVal(M, i); + if (M->getNodeId() == Processed) + RemapValue(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + } + // The original node continues to exist in the DAG, marked NewNode. + } + } + // When recursively update nodes with new nodes, it is possible to have + // new uses of From due to CSE. If this happens, replace the new uses of + // From with To. + } while (!From.use_empty()); +} + +void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for promoted integer"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = PromotedIntegers[Op]; + assert(OpEntry.getNode() == 0 && "Node is already promoted!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for softened float"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = SoftenedFloats[Op]; + assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { + assert(Result.getValueType() == Op.getValueType().getVectorElementType() && + "Invalid type for scalarized vector"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = ScalarizedVectors[Op]; + assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded integer"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; + assert(Entry.first.getNode() == 0 && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded float"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; + assert(Entry.first.getNode() == 0 && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't split"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType().getVectorElementType() == + Op.getValueType().getVectorElementType() && + 2*Lo.getValueType().getVectorNumElements() == + Op.getValueType().getVectorNumElements() && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for split vector"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; + assert(Entry.first.getNode() == 0 && "Node already split"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for widened vector"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = WidenedVectors[Op]; + assert(OpEntry.getNode() == 0 && "Node already widened!"); + OpEntry = Result; +} + + +//===----------------------------------------------------------------------===// +// Utilities. +//===----------------------------------------------------------------------===// + +/// BitConvertToInteger - Convert to an integer of the same size. +SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { + unsigned BitWidth = Op.getValueType().getSizeInBits(); + return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); +} + +/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the +/// same size. +SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { + assert(Op.getValueType().isVector() && "Only applies to vectors!"); + unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); + EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); + unsigned NumElts = Op.getValueType().getVectorNumElements(); + return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); +} + +SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, + EVT DestVT) { + DebugLoc dl = Op.getDebugLoc(); + // Create the stack frame object. Make sure it is aligned for both + // the source and destination types. + SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0, + false, false, 0); + // Result is a load from the stack slot. + return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0); +} + +/// CustomLowerNode - Replace the node's results with custom code provided +/// by the target and return "true", or do nothing and return "false". +/// The last parameter is FALSE if we are dealing with a node with legal +/// result types and illegal operand. The second parameter denotes the type of +/// illegal OperandNo in that case. +/// The last parameter being TRUE means we are dealing with a +/// node with illegal result types. The second parameter denotes the type of +/// illegal ResNo in that case. +bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector<SDValue, 8> Results; + if (LegalizeResult) + TLI.ReplaceNodeResults(N, Results, DAG); + else + TLI.LowerOperationWrapper(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom lower it after all. + return false; + + // Make everything that once used N's values now use those in Results instead. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) + ReplaceValueWith(SDValue(N, i), Results[i]); + return true; +} + + +/// CustomWidenLowerNode - Widen the node's results with custom code provided +/// by the target and return "true", or do nothing and return "false". +bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector<SDValue, 8> Results; + TLI.ReplaceNodeResults(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom widen lower its result after all. + return false; + + // Update the widening map. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) + SetWidenedVector(SDValue(N, i), Results[i]); + return true; +} + +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split into two not necessarily identical pieces. +void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { + // Currently all types are split in half. + if (!InVT.isVector()) { + LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + } else { + unsigned NumElements = InVT.getVectorNumElements(); + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), NumElements/2); + } +} + +/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and +/// high parts of the given value. +void DAGTypeLegalizer::GetPairElements(SDValue Pair, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = Pair.getDebugLoc(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType()); + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(1)); +} + +SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, + SDValue Index) { + DebugLoc dl = Index.getDebugLoc(); + // Make sure the index type is big enough to compute in. + if (Index.getValueType().bitsGT(TLI.getPointerTy())) + Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index); + else + Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EltSize, Index.getValueType())); + return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); +} + +/// JoinIntegers - Build an integer with low bits Lo and high bits Hi. +SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { + // Arbitrarily use dlHi for result DebugLoc + DebugLoc dlHi = Hi.getDebugLoc(); + DebugLoc dlLo = Lo.getDebugLoc(); + EVT LVT = Lo.getValueType(); + EVT HVT = Hi.getValueType(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + LVT.getSizeInBits() + HVT.getSizeInBits()); + + Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); + Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, + DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy())); + return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); +} + +/// LibCallify - Convert the node into a libcall with the same prototype. +SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, + bool isSigned) { + unsigned NumOps = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + if (NumOps == 0) { + return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl); + } else if (NumOps == 1) { + SDValue Op = N->getOperand(0); + return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl); + } else if (NumOps == 2) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl); + } + SmallVector<SDValue, 8> Ops(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + Ops[i] = N->getOperand(i); + + return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl); +} + +/// MakeLibCall - Generate a libcall taking the given operands as arguments and +/// returning a result of type RetVT. +SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + std::pair<SDValue,SDValue> CallInfo = + TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + return CallInfo.first; +} + +/// PromoteTargetBoolean - Promote the given target boolean to a target boolean +/// of the given type. A target boolean is an integer value, not necessarily of +/// type i1, the bits of which conform to getBooleanContents. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { + DebugLoc dl = Bool.getDebugLoc(); + ISD::NodeType ExtendCode; + switch (TLI.getBooleanContents()) { + default: + assert(false && "Unknown BooleanContent!"); + case TargetLowering::UndefinedBooleanContent: + // Extend to VT by adding rubbish bits. + ExtendCode = ISD::ANY_EXTEND; + break; + case TargetLowering::ZeroOrOneBooleanContent: + // Extend to VT by adding zero bits. + ExtendCode = ISD::ZERO_EXTEND; + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: { + // Extend to VT by copying the sign bit. + ExtendCode = ISD::SIGN_EXTEND; + break; + } + } + return DAG.getNode(ExtendCode, dl, VT, Bool); +} + +/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT +/// bits in Hi. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + EVT LoVT, EVT HiVT, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = Op.getDebugLoc(); + assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == + Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); + Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); + Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, + DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy())); + Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); +} + +/// SplitInteger - Return the lower and upper halves of Op's bits in a value +/// type half the size of Op's. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + SDValue &Lo, SDValue &Hi) { + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), + Op.getValueType().getSizeInBits()/2); + SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Entry Point +//===----------------------------------------------------------------------===// + +/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that +/// only uses types natively supported by the target. Returns "true" if it made +/// any changes. +/// +/// Note that this is an involved process that may invalidate pointers into +/// the graph. +bool SelectionDAG::LegalizeTypes() { + return DAGTypeLegalizer(*this).run(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h new file mode 100644 index 0000000..d560292 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -0,0 +1,742 @@ +//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DAGTypeLegalizer class. This is a private interface +// shared between the code that implements the SelectionDAG::LegalizeTypes +// method. +// +//===----------------------------------------------------------------------===// + +#ifndef SELECTIONDAG_LEGALIZETYPES_H +#define SELECTIONDAG_LEGALIZETYPES_H + +#define DEBUG_TYPE "legalize-types" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" + +namespace llvm { + +//===----------------------------------------------------------------------===// +/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks +/// on it until only value types the target machine can handle are left. This +/// involves promoting small sizes to large sizes or splitting up large values +/// into small values. +/// +class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { + const TargetLowering &TLI; + SelectionDAG &DAG; +public: + // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information + // about the state of the node. The enum has all the values. + enum NodeIdFlags { + /// ReadyToProcess - All operands have been processed, so this node is ready + /// to be handled. + ReadyToProcess = 0, + + /// NewNode - This is a new node, not before seen, that was created in the + /// process of legalizing some other node. + NewNode = -1, + + /// Unanalyzed - This node's ID needs to be set to the number of its + /// unprocessed operands. + Unanalyzed = -2, + + /// Processed - This is a node that has already been processed. + Processed = -3 + + // 1+ - This is a node which has this many unprocessed operands. + }; +private: + enum LegalizeAction { + Legal, // The target natively supports this type. + PromoteInteger, // Replace this integer type with a larger one. + ExpandInteger, // Split this integer type into two of half the size. + SoftenFloat, // Convert this float type to a same size integer type. + ExpandFloat, // Split this float type into two of half the size. + ScalarizeVector, // Replace this one-element vector with its element type. + SplitVector, // Split this vector type into two of half the size. + WidenVector // This vector type should be widened into a larger vector. + }; + + /// ValueTypeActions - This is a bitvector that contains two bits for each + /// simple value type, where the two bits correspond to the LegalizeAction + /// enum from TargetLowering. This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// getTypeAction - Return how we should legalize values of this type. + LegalizeAction getTypeAction(EVT VT) const { + switch (ValueTypeActions.getTypeAction(VT)) { + default: + assert(false && "Unknown legalize action!"); + case TargetLowering::Legal: + return Legal; + case TargetLowering::Promote: + // Promote can mean + // 1) For integers, use a larger integer type (e.g. i8 -> i32). + // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32). + if (!VT.isVector()) + return PromoteInteger; + return WidenVector; + case TargetLowering::Expand: + // Expand can mean + // 1) split scalar in half, 2) convert a float to an integer, + // 3) scalarize a single-element vector, 4) split a vector in two. + if (!VT.isVector()) { + if (VT.isInteger()) + return ExpandInteger; + if (VT.getSizeInBits() == + TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits()) + return SoftenFloat; + return ExpandFloat; + } + + if (VT.getVectorNumElements() == 1) + return ScalarizeVector; + return SplitVector; + } + } + + /// isTypeLegal - Return true if this type is legal on this target. + bool isTypeLegal(EVT VT) const { + return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal; + } + + /// IgnoreNodeResults - Pretend all of this node's results are legal. + bool IgnoreNodeResults(SDNode *N) const { + return N->getOpcode() == ISD::TargetConstant; + } + + /// PromotedIntegers - For integer nodes that are below legal width, this map + /// indicates what promoted value to use. + DenseMap<SDValue, SDValue> PromotedIntegers; + + /// ExpandedIntegers - For integer nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers; + + /// SoftenedFloats - For floating point nodes converted to integers of + /// the same size, this map indicates the converted value to use. + DenseMap<SDValue, SDValue> SoftenedFloats; + + /// ExpandedFloats - For float nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats; + + /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the + /// scalar value of type 'ty' to use. + DenseMap<SDValue, SDValue> ScalarizedVectors; + + /// SplitVectors - For nodes that need to be split this map indicates + /// which operands are the expanded version of the input. + DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors; + + /// WidenedVectors - For vector nodes that need to be widened, indicates + /// the widened value to use. + DenseMap<SDValue, SDValue> WidenedVectors; + + /// ReplacedValues - For values that have been replaced with another, + /// indicates the replacement value to use. + DenseMap<SDValue, SDValue> ReplacedValues; + + /// Worklist - This defines a worklist of nodes to process. In order to be + /// pushed onto this worklist, all operands of a node must have already been + /// processed. + SmallVector<SDNode*, 128> Worklist; + +public: + explicit DAGTypeLegalizer(SelectionDAG &dag) + : TLI(dag.getTargetLoweringInfo()), DAG(dag), + ValueTypeActions(TLI.getValueTypeActions()) { + assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && + "Too many value types for ValueTypeActions to hold!"); + } + + /// run - This is the main entry point for the type legalizer. This does a + /// top-down traversal of the dag, legalizing types as it goes. Returns + /// "true" if it made any changes. + bool run(); + + void NoteDeletion(SDNode *Old, SDNode *New) { + ExpungeNode(Old); + ExpungeNode(New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + ReplacedValues[SDValue(Old, i)] = SDValue(New, i); + } + +private: + SDNode *AnalyzeNewNode(SDNode *N); + void AnalyzeNewValue(SDValue &Val); + void ExpungeNode(SDNode *N); + void PerformExpensiveChecks(); + void RemapValue(SDValue &N); + + // Common routines. + SDValue BitConvertToInteger(SDValue Op); + SDValue BitConvertVectorToIntegerVector(SDValue Op); + SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT); + bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult); + bool CustomWidenLowerNode(SDNode *N, EVT VT); + SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); + SDValue JoinIntegers(SDValue Lo, SDValue Hi); + SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); + SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, bool isSigned, + DebugLoc dl); + SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); + void ReplaceValueWith(SDValue From, SDValue To); + void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, + SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Integer Promotion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetPromotedInteger - Given a processed operand Op which was promoted to a + /// larger integer type, this returns the promoted value. The low bits of the + /// promoted value corresponding to the original type are exactly equal to Op. + /// The extra bits contain rubbish, so the promoted value may need to be zero- + /// or sign-extended from the original type before it is usable (the helpers + /// SExtPromotedInteger and ZExtPromotedInteger can do this for you). + /// For example, if Op is an i16 and was promoted to an i32, then this method + /// returns an i32, the lower 16 bits of which coincide with Op, and the upper + /// 16 bits of which contain rubbish. + SDValue GetPromotedInteger(SDValue Op) { + SDValue &PromotedOp = PromotedIntegers[Op]; + RemapValue(PromotedOp); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedInteger(SDValue Op, SDValue Result); + + /// SExtPromotedInteger - Get a promoted operand and sign extend it to the + /// final size. + SDValue SExtPromotedInteger(SDValue Op) { + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = GetPromotedInteger(Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, + DAG.getValueType(OldVT)); + } + + /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the + /// final size. + SDValue ZExtPromotedInteger(SDValue Op) { + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = GetPromotedInteger(Op); + return DAG.getZeroExtendInReg(Op, dl, OldVT); + } + + // Integer Result Promotion. + void PromoteIntegerResult(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_AssertSext(SDNode *N); + SDValue PromoteIntRes_AssertZext(SDNode *N); + SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); + SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); + SDValue PromoteIntRes_BIT_CONVERT(SDNode *N); + SDValue PromoteIntRes_BSWAP(SDNode *N); + SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); + SDValue PromoteIntRes_Constant(SDNode *N); + SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntRes_CTLZ(SDNode *N); + SDValue PromoteIntRes_CTPOP(SDNode *N); + SDValue PromoteIntRes_CTTZ(SDNode *N); + SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); + SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N); + SDValue PromoteIntRes_INT_EXTEND(SDNode *N); + SDValue PromoteIntRes_LOAD(LoadSDNode *N); + SDValue PromoteIntRes_Overflow(SDNode *N); + SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_SDIV(SDNode *N); + SDValue PromoteIntRes_SELECT(SDNode *N); + SDValue PromoteIntRes_SELECT_CC(SDNode *N); + SDValue PromoteIntRes_SETCC(SDNode *N); + SDValue PromoteIntRes_SHL(SDNode *N); + SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue PromoteIntRes_SRA(SDNode *N); + SDValue PromoteIntRes_SRL(SDNode *N); + SDValue PromoteIntRes_TRUNCATE(SDNode *N); + SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_UDIV(SDNode *N); + SDValue PromoteIntRes_UNDEF(SDNode *N); + SDValue PromoteIntRes_VAARG(SDNode *N); + SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + + // Integer Operand Promotion. + bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); + SDValue PromoteIntOp_BIT_CONVERT(SDNode *N); + SDValue PromoteIntOp_BUILD_PAIR(SDNode *N); + SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MEMBARRIER(SDNode *N); + SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_Shift(SDNode *N); + SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); + SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_TRUNCATE(SDNode *N); + SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + + void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); + + //===--------------------------------------------------------------------===// + // Integer Expansion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedInteger - Given a processed operand Op which was expanded into + /// two integers of half the size, this returns the two halves. The low bits + /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi. + /// For example, if Op is an i64 which was expanded into two i32's, then this + /// method returns the two i32's, with Lo being equal to the lower 32 bits of + /// Op, and Hi being equal to the upper 32 bits. + void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi); + + // Integer Result Expansion. + void ExpandIntegerResult(SDNode *N, unsigned ResNo); + void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandShiftByConstant(SDNode *N, unsigned Amt, + SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Integer Operand Expansion. + bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandIntOp_BIT_CONVERT(SDNode *N); + SDValue ExpandIntOp_BR_CC(SDNode *N); + SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N); + SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N); + SDValue ExpandIntOp_SELECT_CC(SDNode *N); + SDValue ExpandIntOp_SETCC(SDNode *N); + SDValue ExpandIntOp_Shift(SDNode *N); + SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ExpandIntOp_TRUNCATE(SDNode *N); + SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_RETURNADDR(SDNode *N); + + void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Float to Integer Conversion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSoftenedFloat - Given a processed operand Op which was converted to an + /// integer of the same size, this returns the integer. The integer contains + /// exactly the same bits as Op - only the type changed. For example, if Op + /// is an f32 which was softened to an i32, then this method returns an i32, + /// the bits of which coincide with those of Op. + SDValue GetSoftenedFloat(SDValue Op) { + SDValue &SoftenedOp = SoftenedFloats[Op]; + RemapValue(SoftenedOp); + assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); + return SoftenedOp; + } + void SetSoftenedFloat(SDValue Op, SDValue Result); + + // Result Float to Integer Conversion. + void SoftenFloatResult(SDNode *N, unsigned OpNo); + SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); + SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); + SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FADD(SDNode *N); + SDValue SoftenFloatRes_FCEIL(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOS(SDNode *N); + SDValue SoftenFloatRes_FDIV(SDNode *N); + SDValue SoftenFloatRes_FEXP(SDNode *N); + SDValue SoftenFloatRes_FEXP2(SDNode *N); + SDValue SoftenFloatRes_FFLOOR(SDNode *N); + SDValue SoftenFloatRes_FLOG(SDNode *N); + SDValue SoftenFloatRes_FLOG2(SDNode *N); + SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMUL(SDNode *N); + SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); + SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N); + SDValue SoftenFloatRes_FP_ROUND(SDNode *N); + SDValue SoftenFloatRes_FPOW(SDNode *N); + SDValue SoftenFloatRes_FPOWI(SDNode *N); + SDValue SoftenFloatRes_FREM(SDNode *N); + SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FSIN(SDNode *N); + SDValue SoftenFloatRes_FSQRT(SDNode *N); + SDValue SoftenFloatRes_FSUB(SDNode *N); + SDValue SoftenFloatRes_FTRUNC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_SELECT(SDNode *N); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_UNDEF(SDNode *N); + SDValue SoftenFloatRes_VAARG(SDNode *N); + SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); + + // Operand Float to Integer Conversion. + bool SoftenFloatOperand(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FP_ROUND(SDNode *N); + SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); + SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); + SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N); + SDValue SoftenFloatOp_SELECT_CC(SDNode *N); + SDValue SoftenFloatOp_SETCC(SDNode *N); + SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + + void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Float Expansion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedFloat - Given a processed operand Op which was expanded into + /// two floating point values of half the size, this returns the two halves. + /// The low bits of Op are exactly equal to the bits of Lo; the high bits + /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded + /// into two f64's, then this method returns the two f64's, with Lo being + /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits. + void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi); + + // Float Result Expansion. + void ExpandFloatResult(SDNode *N, unsigned ResNo); + void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Float Operand Expansion. + bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FP_ROUND(SDNode *N); + SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); + SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); + SDValue ExpandFloatOp_SELECT_CC(SDNode *N); + SDValue ExpandFloatOp_SETCC(SDNode *N); + SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); + + void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Scalarization Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetScalarizedVector - Given a processed one-element vector Op which was + /// scalarized to its element type, this returns the element. For example, + /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32. + SDValue GetScalarizedVector(SDValue Op) { + SDValue &ScalarizedOp = ScalarizedVectors[Op]; + RemapValue(ScalarizedOp); + assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?"); + return ScalarizedOp; + } + void SetScalarizedVector(SDValue Op, SDValue Result); + + // Vector Result Scalarization: <1 x ty> -> ty. + void ScalarizeVectorResult(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_UnaryOp(SDNode *N); + SDValue ScalarizeVecRes_InregOp(SDNode *N); + + SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); + SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue ScalarizeVecRes_FPOWI(SDNode *N); + SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue ScalarizeVecRes_SELECT(SDNode *N); + SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); + SDValue ScalarizeVecRes_SETCC(SDNode *N); + SDValue ScalarizeVecRes_UNDEF(SDNode *N); + SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); + SDValue ScalarizeVecRes_VSETCC(SDNode *N); + + // Vector Operand Scalarization: <1 x ty> -> ty. + bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); + SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Vector Splitting Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSplitVector - Given a processed vector Op which was split into vectors + /// of half the size, this method returns the halves. The first elements of + /// Op coincide with the elements of Lo; the remaining elements of Op coincide + /// with the elements of Hi: Op is what you would get by concatenating Lo and + /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then + /// this method returns the two v4i32's, with Lo corresponding to the first 4 + /// elements of Op, and Hi to the last 4 elements. + void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); + + // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. + void SplitVectorResult(SDNode *N, unsigned OpNo); + void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); + + void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, + SDValue &Hi); + + // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. + bool SplitVectorOperand(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_UnaryOp(SDNode *N); + + SDValue SplitVecOp_BIT_CONVERT(SDNode *N); + SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); + + //===--------------------------------------------------------------------===// + // Vector Widening Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetWidenedVector - Given a processed vector Op which was widened into a + /// larger vector, this method returns the larger vector. The elements of + /// the returned vector consist of the elements of Op followed by elements + /// containing rubbish. For example, if Op is a v2i32 that was widened to a + /// v4i32, then this method returns a v4i32 for which the first two elements + /// are the same as those of Op, while the last two elements contain rubbish. + SDValue GetWidenedVector(SDValue Op) { + SDValue &WidenedOp = WidenedVectors[Op]; + RemapValue(WidenedOp); + assert(WidenedOp.getNode() && "Operand wasn't widened?"); + return WidenedOp; + } + void SetWidenedVector(SDValue Op, SDValue Result); + + // Widen Vector Result Promotion. + void WidenVectorResult(SDNode *N, unsigned ResNo); + SDValue WidenVecRes_BIT_CONVERT(SDNode* N); + SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); + SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); + SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); + SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); + SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); + SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); + SDValue WidenVecRes_SELECT(SDNode* N); + SDValue WidenVecRes_SELECT_CC(SDNode* N); + SDValue WidenVecRes_SETCC(SDNode* N); + SDValue WidenVecRes_UNDEF(SDNode *N); + SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); + SDValue WidenVecRes_VSETCC(SDNode* N); + + SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_POWI(SDNode *N); + SDValue WidenVecRes_Shift(SDNode *N); + SDValue WidenVecRes_Unary(SDNode *N); + SDValue WidenVecRes_InregOp(SDNode *N); + + // Widen Vector Operand. + bool WidenVectorOperand(SDNode *N, unsigned ResNo); + SDValue WidenVecOp_BIT_CONVERT(SDNode *N); + SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue WidenVecOp_STORE(SDNode* N); + + SDValue WidenVecOp_Convert(SDNode *N); + + //===--------------------------------------------------------------------===// + // Vector Widening Utilities Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// Helper GenWidenVectorLoads - Helper function to generate a set of + /// loads to load a vector with a resulting wider type. It takes + /// LdChain: list of chains for the load to be generated. + /// Ld: load to widen + SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, + LoadSDNode *LD); + + /// GenWidenVectorExtLoads - Helper function to generate a set of extension + /// loads to load a ector with a resulting wider type. It takes + /// LdChain: list of chains for the load to be generated. + /// Ld: load to widen + /// ExtType: extension element type + SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, + LoadSDNode *LD, ISD::LoadExtType ExtType); + + /// Helper genWidenVectorStores - Helper function to generate a set of + /// stores to store a widen vector into non widen memory + /// StChain: list of chains for the stores we have generated + /// ST: store of a widen value + void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST); + + /// Helper genWidenVectorTruncStores - Helper function to generate a set of + /// stores to store a truncate widen vector into non widen memory + /// StChain: list of chains for the stores we have generated + /// ST: store of a widen value + void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, + StoreSDNode *ST); + + /// Modifies a vector input (widen or narrows) to a vector of NVT. The + /// input vector must have the same element type as NVT. + SDValue ModifyToType(SDValue InOp, EVT WidenVT); + + + //===--------------------------------------------------------------------===// + // Generic Splitting: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // not necessarily identical types. As such they can be used for splitting + // vectors and expanding integers and floats. + + void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isVector()) + GetSplitVector(Op, Lo, Hi); + else if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type + /// which is split (or expanded) into two not necessarily identical pieces. + void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT); + + /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and + /// high parts of the given value. + void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); + + // Generic Result Splitting. + void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Generic Expansion: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // identical types of half the size, and that the Lo/Hi part is stored first + // in memory on little/big-endian machines, followed by the Hi/Lo part. As + // such they can be used for expanding integers and floats. + + void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + // Generic Result Expansion. + void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi); + + // Generic Operand Expansion. + SDValue ExpandOp_BIT_CONVERT (SDNode *N); + SDValue ExpandOp_BUILD_VECTOR (SDNode *N); + SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N); + SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); + SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N); + SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo); +}; + +} // end namespace llvm. + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp new file mode 100644 index 0000000..9c2b1d9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -0,0 +1,482 @@ +//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements generic type expansion and splitting for LegalizeTypes. +// The routines here perform legalization when the details of the type (such as +// whether it is an integer or a float) do not matter. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. The Lo/Hi part +// is required to be stored first in memory on little/big-endian machines. +// Splitting is the act of changing a computation in an illegal type to be a +// computation in two not necessarily identical registers of a smaller type. +// There are no requirements on how the type is represented in memory. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/Target/TargetData.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Generic Result Expansion. +//===----------------------------------------------------------------------===// + +// These routines assume that the Lo/Hi part is stored first in memory on +// little/big-endian machines, followed by the Hi/Lo part. This means that +// they cannot be used as is on vectors, for which Lo is always stored first. + +void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + case Legal: + case PromoteInteger: + break; + case SoftenFloat: + // Convert the integer operand instead. + SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case ExpandInteger: + case ExpandFloat: + // Convert the expanded pieces of the input. + GetExpandedOp(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case SplitVector: + GetSplitVector(InOp, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case ScalarizeVector: + // Convert the element instead. + SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case WidenVector: { + assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); + InOp = GetWidenedVector(InOp); + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + InVT.getVectorNumElements()/2); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + } + } + + if (InVT.isVector() && OutVT.isInteger()) { + // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand + // is legal but the result is not. + EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2); + + if (isTypeLegal(NVT)) { + SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp); + Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, + DAG.getIntPtrConstant(1)); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + return; + } + } + + // Lower the bit-convert to a store/load from the stack. + assert(NOutVT.isByteSized() && "Expanded type not byte sized!"); + + // Create the stack frame object. Make sure it is aligned for both + // the source and expanded destination types. + unsigned Alignment = + TLI.getTargetData()->getPrefTypeAlignment(NOutVT. + getTypeForEVT(*DAG.getContext())); + SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0, + false, false, 0); + + // Load the first half from the stack slot. + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0); + + // Increment the pointer to the other half. + unsigned IncrementSize = NOutVT.getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getIntPtrConstant(IncrementSize)); + + // Load the second half from the stack slot. + Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, + false, MinAlign(Alignment, IncrementSize)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Return the operands. + Lo = N->getOperand(0); + Hi = N->getOperand(1); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + GetExpandedOp(N->getOperand(0), Lo, Hi); + SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? + Hi : Lo; + + assert(Part.getValueType() == N->getValueType(0) && + "Type twice as big as expanded type not itself expanded!"); + + GetPairElements(Part, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue OldVec = N->getOperand(0); + unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + // Convert to a vector of the expanded element type, for example + // <3 x i64> -> <6 x i32>. + EVT OldVT = N->getValueType(0); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); + + SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + EVT::getVectorVT(*DAG.getContext(), + NewVT, 2*OldElts), + OldVec); + + // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. + SDValue Idx = N->getOperand(1); + + // Make sure the type of Idx is big enough to hold the new values. + if (Idx.getValueType().bitsLT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + DAG.getConstant(1, Idx.getValueType())); + Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); + DebugLoc dl = N->getDebugLoc(); + + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, + isVolatile, isNonTemporal, Alignment); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits() / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset+IncrementSize, + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + +void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + const unsigned Align = N->getConstantOperandVal(3); + + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + + +//===--------------------------------------------------------------------===// +// Generic Operand Expansion. +//===--------------------------------------------------------------------===// + +SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0).isVector()) { + // An illegal expanding type is being converted to a legal vector type. + // Make a two element vector out of the expanded parts and convert that + // instead, but only if the new vector type is legal (otherwise there + // is no point, and it might create expansion loops). For example, on + // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32. + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), + TLI.getTypeToTransformTo(*DAG.getContext(), OVT), + 2); + + if (isTypeLegal(NVT)) { + SDValue Parts[2]; + GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]); + + if (TLI.isBigEndian()) + std::swap(Parts[0], Parts[1]); + + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); + return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec); + } + } + + // Otherwise, store to a temporary and load out again as the new type. + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type needs expansion. + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + EVT OldVT = N->getOperand(0).getValueType(); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); + DebugLoc dl = N->getDebugLoc(); + + assert(OldVT == VecVT.getVectorElementType() && + "BUILD_VECTOR operand type doesn't match vector element type!"); + + // Build a vector of twice the length out of the expanded elements. + // For example <3 x i64> -> <6 x i32>. + std::vector<SDValue> NewElts; + NewElts.reserve(NumElts*2); + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(i), Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + NewElts.push_back(Lo); + NewElts.push_back(Hi); + } + + SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, + EVT::getVectorVT(*DAG.getContext(), + NewVT, NewElts.size()), + &NewElts[0], NewElts.size()); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(0), Lo, Hi); + return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo; +} + +SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { + // The vector type is legal but the element type needs expansion. + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + SDValue Val = N->getOperand(1); + EVT OldEVT = Val.getValueType(); + EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT); + + assert(OldEVT == VecVT.getVectorElementType() && + "Inserted element type doesn't match vector element type!"); + + // Bitconvert to a vector of twice the length with elements of the expanded + // type, insert the expanded vector elements, and then convert back. + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2); + SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + NewVecVT, N->getOperand(0)); + + SDValue Lo, Hi; + GetExpandedOp(Val, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + SDValue Idx = N->getOperand(2); + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); + Idx = DAG.getNode(ISD::ADD, dl, + Idx.getValueType(), Idx, DAG.getIntPtrConstant(1)); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && + "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + Ops[0] = N->getOperand(0); + SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); + for (unsigned i = 1; i < NumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { + assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + DebugLoc dl = N->getDebugLoc(); + + StoreSDNode *St = cast<StoreSDNode>(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), + St->getValue().getValueType()); + SDValue Chain = St->getChain(); + SDValue Ptr = St->getBasePtr(); + int SVOffset = St->getSrcValueOffset(); + unsigned Alignment = St->getAlignment(); + bool isVolatile = St->isVolatile(); + bool isNonTemporal = St->isNonTemporal(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + unsigned IncrementSize = NVT.getSizeInBits() / 8; + + SDValue Lo, Hi; + GetExpandedOp(St->getValue(), Lo, Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset, + isVolatile, isNonTemporal, Alignment); + + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); + Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(), + SVOffset + IncrementSize, + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + + +//===--------------------------------------------------------------------===// +// Generic Result Splitting. +//===--------------------------------------------------------------------===// + +// Be careful to make no assumptions about which of Lo/Hi is stored first in +// memory (for vectors it is always Lo first followed by Hi in the following +// bytes; for integers and floats it is Lo first if and only if the machine is +// little-endian). + +void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // A MERGE_VALUES node can produce any number of values. We know that the + // first illegal one needs to be expanded into Lo/Hi. + unsigned i; + + // The string of legal results gets turned into input operands, which have + // the same type. + for (i = 0; isTypeLegal(N->getValueType(i)); ++i) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); + + // The first illegal result must be the one that needs to be expanded. + GetSplitOp(N->getOperand(i), Lo, Hi); + + // Legalize the rest of the results into the input operands whether they are + // legal or not. + unsigned e = N->getNumValues(); + for (++i; i != e; ++i) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); +} + +void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH; + DebugLoc dl = N->getDebugLoc(); + GetSplitOp(N->getOperand(1), LL, LH); + GetSplitOp(N->getOperand(2), RL, RH); + + SDValue Cond = N->getOperand(0); + Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL); + Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH); +} + +void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH; + DebugLoc dl = N->getDebugLoc(); + GetSplitOp(N->getOperand(2), LL, LH); + GetSplitOp(N->getOperand(3), RL, RH); + + Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0), + N->getOperand(1), LL, RL, N->getOperand(4)); + Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0), + N->getOperand(1), LH, RH, N->getOperand(4)); +} + +void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + Lo = DAG.getUNDEF(LoVT); + Hi = DAG.getUNDEF(HiVT); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp new file mode 100644 index 0000000..621c087 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -0,0 +1,290 @@ +//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeVectors method. +// +// The vector legalizer looks for vector operations which might need to be +// scalarized and legalizes them. This is a separate step from Legalize because +// scalarizing can introduce illegal types. For example, suppose we have an +// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition +// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the +// operation, which introduces nodes with the illegal type i64 which must be +// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; +// the operation must be unrolled, which introduces nodes with the illegal +// type i8 which must be promoted. +// +// This does not legalize vector manipulations like ISD::BUILD_VECTOR, +// or operations that happen to take a vector which are custom-lowered; +// the legalization for such operations never produces nodes +// with illegal types, so it's okay to put off legalizing them until +// SelectionDAG::Legalize runs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +namespace { +class VectorLegalizer { + SelectionDAG& DAG; + const TargetLowering &TLI; + bool Changed; // Keep track of whether anything changed + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + DenseMap<SDValue, SDValue> LegalizedNodes; + + // Adds a node to the translation cache + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + + // Legalizes the given node + SDValue LegalizeOp(SDValue Op); + // Assuming the node is legal, "legalize" the results + SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); + // Implements unrolling a VSETCC. + SDValue UnrollVSETCC(SDValue Op); + // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB + // isn't legal. + SDValue ExpandFNEG(SDValue Op); + // Implements vector promotion; this is essentially just bitcasting the + // operands to a different type and bitcasting the result back to the + // original type. + SDValue PromoteVectorOp(SDValue Op); + + public: + bool Run(); + VectorLegalizer(SelectionDAG& dag) : + DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} +}; + +bool VectorLegalizer::Run() { + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) + LegalizeOp(SDValue(I, 0)); + + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); + + return Changed; +} + +SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { + // Generic legalization: just pass the operand through. + for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) + AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); + return Result.getValue(Op.getResNo()); +} + +SDValue VectorLegalizer::LegalizeOp(SDValue Op) { + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDNode* Node = Op.getNode(); + + // Legalize the operands + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + + SDValue Result = + SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); + + bool HasVectorValue = false; + for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); + J != E; + ++J) + HasVectorValue |= J->isVector(); + if (!HasVectorValue) + return TranslateLegalizeResults(Op, Result); + + EVT QueryType; + switch (Op.getOpcode()) { + default: + return TranslateLegalizeResults(Op, Result); + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::SELECT: + case ISD::SELECT_CC: + case ISD::VSETCC: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FNEG: + case ISD::FABS: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FPOWI: + case ISD::FPOW: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FFLOOR: + QueryType = Node->getValueType(0); + break; + case ISD::SIGN_EXTEND_INREG: + case ISD::FP_ROUND_INREG: + QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + QueryType = Node->getOperand(0).getValueType(); + break; + } + + switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { + case TargetLowering::Promote: + // "Promote" the operation by bitcasting + Result = PromoteVectorOp(Op); + Changed = true; + break; + case TargetLowering::Legal: break; + case TargetLowering::Custom: { + SDValue Tmp1 = TLI.LowerOperation(Op, DAG); + if (Tmp1.getNode()) { + Result = Tmp1; + break; + } + // FALL THROUGH + } + case TargetLowering::Expand: + if (Node->getOpcode() == ISD::FNEG) + Result = ExpandFNEG(Op); + else if (Node->getOpcode() == ISD::VSETCC) + Result = UnrollVSETCC(Op); + else + Result = DAG.UnrollVectorOp(Op.getNode()); + break; + } + + // Make sure that the generated code is itself legal. + if (Result != Op) { + Result = LegalizeOp(Result); + Changed = true; + } + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { + // Vector "promotion" is basically just bitcasting and doing the operation + // in a different type. For example, x86 promotes ISD::AND on v2i32 to + // v1i64. + EVT VT = Op.getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + DebugLoc dl = Op.getDebugLoc(); + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); + + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); +} + +SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { + if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { + SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + Zero, Op.getOperand(0)); + } + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { + EVT VT = Op.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); + EVT TmpEltVT = LHS.getValueType().getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + SmallVector<SDValue, 8> Ops(NumElems); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getIntPtrConstant(i)); + SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getIntPtrConstant(i)); + Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT), + LHSElem, RHSElem, CC); + Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i], + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), EltVT), + DAG.getConstant(0, EltVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); +} + +} + +bool SelectionDAG::LegalizeVectors() { + return VectorLegalizer(*this).Run(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp new file mode 100644 index 0000000..93bc2d0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -0,0 +1,2562 @@ +//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file performs vector type splitting and scalarization for LegalizeTypes. +// Scalarization is the act of changing a computation in an illegal one-element +// vector type to be a computation in its scalar element type. For example, +// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed +// as a base case when scalarizing vector arithmetic like <4 x f32>, which +// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 +// types. +// Splitting is the act of changing a computation in an invalid vector type to +// be a computation in two vectors of half the size. For example, implementing +// <128 x f32> operations in terms of two <64 x f32> operations. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Result Vector Scalarization: <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ScalarizeVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to scalarize the result of this operator!"); + + case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: R = N->getOperand(0); break; + case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; + case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; + case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; + case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; + case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break; + + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCOS: + case ISD::FNEG: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + R = ScalarizeVecRes_UnaryOp(N); + break; + + case ISD::ADD: + case ISD::AND: + case ISD::FADD: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::SUB: + case ISD::UDIV: + case ISD::UREM: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + R = ScalarizeVecRes_BinOp(N); + break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetScalarizedVector(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + NewVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + return DAG.getConvertRndSat(NewVT, N->getDebugLoc(), + Op0, DAG.getValueType(NewVT), + DAG.getValueType(Op0.getValueType()), + N->getOperand(3), + N->getOperand(4), + cast<CvtRndSatSDNode>(N)->getCvtCode()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + N->getValueType(0).getVectorElementType(), + N->getOperand(0), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FPOWI, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { + // The value to insert may have a wider type than the vector element type, + // so be sure to truncate it to the element type if necessary. + SDValue Op = N->getOperand(1); + EVT EltVT = N->getValueType(0).getVectorElementType(); + if (Op.getValueType() != EltVT) + // FIXME: Can this happen for floating point types? + Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op); + return Op; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { + assert(N->isUnindexed() && "Indexed vector load?"); + + SDValue Result = DAG.getLoad(ISD::UNINDEXED, + N->getExtensionType(), + N->getValueType(0).getVectorElementType(), + N->getDebugLoc(), + N->getChain(), N->getBasePtr(), + DAG.getUNDEF(N->getBasePtr().getValueType()), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->isNonTemporal(), + N->getOriginalAlignment()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { + // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. + EVT DestVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType(); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT, + LHS, DAG.getValueType(ExtVT)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(), + N->getOperand(0), N->getOperand(1), + LHS, GetScalarizedVector(N->getOperand(3)), + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + DebugLoc DL = N->getDebugLoc(); + + // Turn it into a scalar SETCC. + return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { + // Figure out if the scalar is the LHS or RHS and return it. + SDValue Arg = N->getOperand(2).getOperand(0); + if (Arg.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); + unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue(); + return GetScalarizedVector(N->getOperand(Op)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + EVT NVT = N->getValueType(0).getVectorElementType(); + EVT SVT = TLI.getSetCCResultType(LHS.getValueType()); + DebugLoc DL = N->getDebugLoc(); + + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2)); + + // VSETCC always returns a sign-extended value, while SETCC may not. The + // SETCC result type may not match the vector element type. Correct these. + if (NVT.bitsLE(SVT)) { + // The SETCC result type is bigger than the vector element type. + // Ensure the SETCC result is sign-extended. + if (TLI.getBooleanContents() != + TargetLowering::ZeroOrNegativeOneBooleanContent) + Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res, + DAG.getValueType(MVT::i1)); + // Truncate to the final type. + return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res); + } + + // The SETCC result type is smaller than the vector element type. + // If the SetCC result is not sign-extended, chop it down to MVT::i1. + if (TLI.getBooleanContents() != + TargetLowering::ZeroOrNegativeOneBooleanContent) + Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res); + // Sign extend to the final type. + return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res); +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Scalarization <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to scalarize this operator's operand!"); + case ISD::BIT_CONVERT: + Res = ScalarizeVecOp_BIT_CONVERT(N); + break; + case ISD::CONCAT_VECTORS: + Res = ScalarizeVecOp_CONCAT_VECTORS(N); + break; + case ISD::EXTRACT_VECTOR_ELT: + Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); + break; + case ISD::STORE: + Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Elt); +} + +/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - +/// use a BUILD_VECTOR instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { + SmallVector<SDValue, 8> Ops(N->getNumOperands()); + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + Ops[i] = GetScalarizedVector(N->getOperand(i)); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + &Ops[0], Ops.size()); +} + +/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to +/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the +/// index. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Res = GetScalarizedVector(N->getOperand(0)); + if (Res.getValueType() != N->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), + Res); + return Res; +} + +/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be +/// scalarized, it must be <1 x ty>. Just store the element. +SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(N->isUnindexed() && "Indexed store of one-element vector?"); + assert(OpNo == 1 && "Do not know how to scalarize this operand!"); + DebugLoc dl = N->getDebugLoc(); + + if (N->isTruncatingStore()) + return DAG.getTruncStore(N->getChain(), dl, + GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->isNonTemporal(), + N->getAlignment()); + + return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), + N->isVolatile(), N->isNonTemporal(), + N->getOriginalAlignment()); +} + + +//===----------------------------------------------------------------------===// +// Result Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorResult - This method is called when the specified result of the +/// specified node is found to need vector splitting. At this point, the node +/// may also have invalid operands or may have other results that need +/// legalization, we just know that (at least) one result needs vector +/// splitting. +void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Split node result: "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Lo, Hi; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SplitVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to split the result of this operator!"); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; + case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; + case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; + case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::LOAD: + SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); + break; + case ISD::SETCC: + case ISD::VSETCC: + SplitVecRes_SETCC(N, Lo, Hi); + break; + case ISD::VECTOR_SHUFFLE: + SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); + break; + + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FNEG: + case ISD::FABS: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + SplitVecRes_UnaryOp(N, Lo, Hi); + break; + + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::FDIV: + case ISD::FPOW: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::UREM: + case ISD::SREM: + case ISD::FREM: + SplitVecRes_BinOp(N, Lo, Hi); + break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetSplitVector(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDValue RHSLo, RHSHi; + GetSplitVector(N->getOperand(1), RHSLo, RHSHi); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); +} + +void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // We know the result is a vector. The input may be either a vector or a + // scalar value. + EVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + DebugLoc dl = N->getDebugLoc(); + + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + case Legal: + case PromoteInteger: + case SoftenFloat: + case ScalarizeVector: + break; + case ExpandInteger: + case ExpandFloat: + // A scalar to vector conversion, where the scalar needs expansion. + // If the vector is being split in two then we can just convert the + // expanded pieces. + if (LoVT == HiVT) { + GetExpandedOp(InOp, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + return; + } + break; + case SplitVector: + // If the input is a vector that needs to be split, convert each split + // piece of the input now. + GetSplitVector(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + return; + } + + // In the general case, convert the input to an integer and split it by hand. + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + if (TLI.isBigEndian()) + std::swap(LoIntVT, HiIntVT); + + SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + unsigned LoNumElts = LoVT.getVectorNumElements(); + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); + Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end()); + Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size()); +} + +void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); + DebugLoc dl = N->getDebugLoc(); + unsigned NumSubvectors = N->getNumOperands() / 2; + if (NumSubvectors == 1) { + Lo = N->getOperand(0); + Hi = N->getOperand(1); + return; + } + + EVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size()); +} + +void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SDValue DTyOpLo = DAG.getValueType(LoVT); + SDValue DTyOpHi = DAG.getValueType(HiVT); + + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + + // Split the input. + SDValue VLo, VHi; + EVT InVT = N->getOperand(0).getValueType(); + switch (getTypeAction(InVT)) { + default: llvm_unreachable("Unexpected type action!"); + case Legal: { + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + case SplitVector: + GetSplitVector(N->getOperand(0), VLo, VHi); + break; + case WidenVector: { + // If the result needs to be split and the input needs to be widened, + // the two types must have different lengths. Use the widened result + // and extract from it to do the split. + SDValue InOp = GetWidenedVector(N->getOperand(0)); + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + } + + SDValue STyOpLo = DAG.getValueType(VLo.getValueType()); + SDValue STyOpHi = DAG.getValueType(VHi.getValueType()); + + Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp, + CvtCode); + Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp, + CvtCode); +} + +void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT IdxVT = Idx.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + EVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); + Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx, + DAG.getConstant(LoVT.getVectorNumElements(), IdxVT)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx); +} + +void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); +} + +void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + DebugLoc dl = N->getDebugLoc(); + + EVT LoVT, HiVT; + GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, + DAG.getValueType(LoVT)); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, + DAG.getValueType(HiVT)); +} + +void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Elt = N->getOperand(1); + SDValue Idx = N->getOperand(2); + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(Vec, Lo, Hi); + + if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) { + unsigned IdxVal = CIdx->getZExtValue(); + unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); + if (IdxVal < LoNumElts) + Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + Lo.getValueType(), Lo, Elt, Idx); + else + Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, + DAG.getIntPtrConstant(IdxVal - LoNumElts)); + return; + } + + // Spill the vector to the stack. + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, + false, false, 0); + + // Store the new element. This may be larger than the vector element type, + // so use a truncating store. + SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = + TLI.getTargetData()->getPrefTypeAlignment(VecType); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT, + false, false, 0); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0, + false, false, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getIntPtrConstant(IncrementSize)); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false, + false, MinAlign(Alignment, IncrementSize)); +} + +void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); + Hi = DAG.getUNDEF(HiVT); +} + +void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); + EVT LoVT, HiVT; + DebugLoc dl = LD->getDebugLoc(); + GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + const Value *SV = LD->getSrcValue(); + int SVOffset = LD->getSrcValueOffset(); + EVT MemoryVT = LD->getMemoryVT(); + unsigned Alignment = LD->getOriginalAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + + EVT LoMemVT, HiMemVT; + GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, + SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + SVOffset += IncrementSize; + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, + SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), Ch); +} + +void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + DebugLoc DL = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + // Split the input. + EVT InVT = N->getOperand(0).getValueType(); + SDValue LL, LH, RL, RH; + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + + RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), + DAG.getIntPtrConstant(0)); + RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); +} + +void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Get the dest types - they may not match the input types, e.g. int_to_fp. + EVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + // Split the input. + EVT InVT = N->getOperand(0).getValueType(); + switch (getTypeAction(InVT)) { + default: llvm_unreachable("Unexpected type action!"); + case Legal: { + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + case SplitVector: + GetSplitVector(N->getOperand(0), Lo, Hi); + break; + case WidenVector: { + // If the result needs to be split and the input needs to be widened, + // the two types must have different lengths. Use the widened result + // and extract from it to do the split. + SDValue InOp = GetWidenedVector(N->getOperand(0)); + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + } + + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, + SDValue &Lo, SDValue &Hi) { + // The low and high parts of the original input give four input vectors. + SDValue Inputs[4]; + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); + GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); + EVT NewVT = Inputs[0].getValueType(); + unsigned NewElts = NewVT.getVectorNumElements(); + + // If Lo or Hi uses elements from at most two of the four input vectors, then + // express it as a vector shuffle of those two inputs. Otherwise extract the + // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. + SmallVector<int, 16> Ops; + for (unsigned High = 0; High < 2; ++High) { + SDValue &Output = High ? Hi : Lo; + + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. + unsigned FirstMaskIdx = High * NewElts; + bool useBuildVector = false; + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element does not index into any input vector. + Ops.push_back(-1); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) { + // This input vector is already an operand. + break; + } else if (InputUsed[OpNo] == -1U) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } + + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up on trying to create a + // shuffle vector. Insert all elements into a BUILD_VECTOR instead. + useBuildVector = true; + break; + } + + // Add the mask index for the new shuffle vector. + Ops.push_back(Idx + OpNo * NewElts); + } + + if (useBuildVector) { + EVT EltVT = NewVT.getVectorElementType(); + SmallVector<SDValue, 16> SVOps; + + // Extract the input elements by hand. + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element is "undef" or indexes off the end of the input. + SVOps.push_back(DAG.getUNDEF(EltVT)); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Extract the vector element by hand. + SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Inputs[Input], DAG.getIntPtrConstant(Idx))); + } + + // Construct the Lo/Hi output using a BUILD_VECTOR. + Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size()); + } else if (InputUsed[0] == -1U) { + // No input vectors were used! The result is undefined. + Output = DAG.getUNDEF(NewVT); + } else { + SDValue Op0 = Inputs[InputUsed[0]]; + // If only one input was used, use an undefined vector for the other. + SDValue Op1 = InputUsed[1] == -1U ? + DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; + // At least one input vector was used. Create a new shuffle vector. + Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); + } + + Ops.clear(); + } +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorOperand - This method is called when the specified operand of the +/// specified node is found to need vector splitting. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need legalization as well as the specified one. +bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Split node operand: "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to split this operator's operand!"); + + case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::STORE: + Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; + + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Res = SplitVecOp_UnaryOp(N); + break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) { + // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will + // end up being split all the way down to individual components. Convert the + // split pieces into integers and reassemble. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), + JoinIntegers(Lo, Hi)); +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + // We know that the extracted result type is legal. For now, assume the index + // is a constant. + EVT SubVT = N->getValueType(0); + SDValue Idx = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + if (IdxVal < LoElts) { + assert(IdxVal + SubVT.getVectorNumElements() <= LoElts && + "Extracted subvector crosses vector split!"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + } else { + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, + DAG.getConstant(IdxVal - LoElts, Idx.getValueType())); + } +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT VecVT = Vec.getValueType(); + + if (isa<ConstantSDNode>(Idx)) { + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!"); + + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + + if (IdxVal < LoElts) + return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); + return SDValue(DAG.UpdateNodeOperands(N, Hi, + DAG.getConstant(IdxVal - LoElts, + Idx.getValueType())), 0); + } + + // Store the vector to the stack. + EVT EltVT = VecVT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0, + false, false, 0); + + // Load back the required element. + StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr, + SV, 0, EltVT, false, false, 0); +} + +SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed store of vector?"); + assert(OpNo == 1 && "Can only split the stored value"); + DebugLoc DL = N->getDebugLoc(); + + bool isTruncating = N->isTruncatingStore(); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + bool isVol = N->isVolatile(); + bool isNT = N->isNonTemporal(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(1), Lo, Hi); + + EVT LoMemVT, HiMemVT; + GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + + if (isTruncating) + Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, + LoMemVT, isVol, isNT, Alignment); + else + Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, + isVol, isNT, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + SVOffset += IncrementSize; + + if (isTruncating) + Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, + HiMemVT, isVol, isNT, Alignment); + else + Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, + isVol, isNT, Alignment); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { + DebugLoc DL = N->getDebugLoc(); + + // The input operands all must have the same type, and we know the result the + // result type is valid. Convert this to a buildvector which extracts all the + // input elements. + // TODO: If the input elements are power-two vectors, we could convert this to + // a new CONCAT_VECTORS node with elements that are half-wide. + SmallVector<SDValue, 32> Elts; + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { + SDValue Op = N->getOperand(op); + for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); + i != e; ++i) { + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, + Op, DAG.getIntPtrConstant(i))); + + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), + &Elts[0], Elts.size()); +} + + +//===----------------------------------------------------------------------===// +// Result Vector Widening +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Widen node result " << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + + // See if the target wants to custom widen this node. + if (CustomWidenLowerNode(N, N->getValueType(ResNo))) + return; + + SDValue Res = SDValue(); + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "WidenVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to widen the result of this operator!"); + + case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; + case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; + case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; + case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: + Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); + break; + case ISD::VSETCC: + Res = WidenVecRes_VSETCC(N); + break; + + case ISD::ADD: + case ISD::AND: + case ISD::BSWAP: + case ISD::FADD: + case ISD::FCOPYSIGN: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::UDIV: + case ISD::UREM: + case ISD::SUB: + case ISD::XOR: + Res = WidenVecRes_Binary(N); + break; + + case ISD::FPOWI: + Res = WidenVecRes_POWI(N); + break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + Res = WidenVecRes_Shift(N); + break; + + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Res = WidenVecRes_Convert(N); + break; + + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCOS: + case ISD::FNEG: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + Res = WidenVecRes_Unary(N); + break; + } + + // If Res is null, the sub-method took care of registering the result. + if (Res.getNode()) + SetWidenedVector(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { + // Binary op widening. + unsigned Opcode = N->getOpcode(); + DebugLoc dl = N->getDebugLoc(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenEltVT = WidenVT.getVectorElementType(); + EVT VT = WidenVT; + unsigned NumElts = VT.getVectorNumElements(); + while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } + + if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { + // Operation doesn't trap so just widen as normal. + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + } + + // No legal vector version so unroll the vector operation and then widen. + if (NumElts == 1) + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + + // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); + + SmallVector<SDValue, 16> ConcatOps(CurNumElts); + unsigned ConcatEnd = 0; // Current ConcatOps index. + int Idx = 0; // Current Idx into input vectors. + + // NumElts := greatest synthesizable vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } + while (CurNumElts != 0) { + while (CurNumElts >= NumElts) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getIntPtrConstant(Idx)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); + Idx += NumElts; + CurNumElts -= NumElts; + } + do { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); + + if (NumElts == 1) { + for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp1, DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp2, DAG.getIntPtrConstant(Idx)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, + EOp1, EOp2); + } + CurNumElts = 0; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + Idx = ConcatEnd - 1; + VT = ConcatOps[Idx--].getValueType(); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeSynthesizable(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, + ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); + } + ConcatOps[Idx+1] = VecOp; + ConcatEnd = Idx + 2; + } else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, &SubConcatOps[0], + OpsToConcat); + ConcatEnd = SubConcatIdx + 1; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); + if (NumOps != ConcatEnd ) { + SDValue UndefVal = DAG.getUNDEF(MaxVT); + for (unsigned j = ConcatEnd; j < NumOps; ++j) + ConcatOps[j] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + SDValue InOp = N->getOperand(0); + DebugLoc dl = N->getDebugLoc(); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); + + unsigned Opcode = N->getOpcode(); + unsigned InVTNumElts = InVT.getVectorNumElements(); + + if (getTypeAction(InVT) == WidenVector) { + InOp = GetWidenedVector(N->getOperand(0)); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) + return DAG.getNode(Opcode, dl, WidenVT, InOp); + } + + if (TLI.isTypeSynthesizable(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(Opcode, dl, WidenVT, + DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, + &Ops[0], NumConcat)); + } + + if (InVTNumElts % WidenNumElts == 0) { + // Extract the input and convert the shorten input vector. + return DAG.getNode(Opcode, dl, WidenVT, + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, + InOp, DAG.getIntPtrConstant(0))); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = WidenVT.getVectorElementType(); + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i))); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + + EVT ShVT = ShOp.getValueType(); + if (getTypeAction(ShVT) == WidenVector) { + ShOp = GetWidenedVector(ShOp); + ShVT = ShOp.getValueType(); + } + EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), + ShVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + if (ShVT != ShWidenVT) + ShOp = ModifyToType(ShOp, ShWidenVT); + + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { + // Unary op widening. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), + cast<VTSDNode>(N->getOperand(1))->getVT() + .getVectorElementType(), + WidenVT.getVectorNumElements()); + SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + WidenVT, WidenLHS, DAG.getValueType(ExtVT)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + DebugLoc dl = N->getDebugLoc(); + + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + break; + case Legal: + break; + case PromoteInteger: + // If the InOp is promoted to the same size, convert it. Otherwise, + // fall out of the switch and widen the promoted input. + InOp = GetPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + break; + case SoftenFloat: + case ExpandInteger: + case ExpandFloat: + case ScalarizeVector: + case SplitVector: + break; + case WidenVector: + // If the InOp is widened to the same size, convert it. Otherwise, fall + // out of the switch and widen the widened input. + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + // The input widens to the same size. Convert to the widen value. + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + break; + } + + unsigned WidenSize = WidenVT.getSizeInBits(); + unsigned InSize = InVT.getSizeInBits(); + if (WidenSize % InSize == 0) { + // Determine new input vector type. The new input vector type will use + // the same element type (if its a vector) or use the input type as a + // vector. It is the same size as the type to widen to. + EVT NewInVT; + unsigned NewNumElts = WidenSize / InSize; + if (InVT.isVector()) { + EVT InEltVT = InVT.getVectorElementType(); + NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, + WidenSize / InEltVT.getSizeInBits()); + } else { + NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); + } + + if (TLI.isTypeSynthesizable(NewInVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + SmallVector<SDValue, 16> Ops(NewNumElts); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i < NewNumElts; ++i) + Ops[i] = UndefVal; + + SDValue NewVec; + if (InVT.isVector()) + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NewInVT, &Ops[0], NewNumElts); + else + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, + NewInVT, &Ops[0], NewNumElts); + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec); + } + } + + return CreateStackStoreLoad(InOp, WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + // Build a vector with undefined for the new nodes. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end()); + NewOps.reserve(WidenNumElts); + for (unsigned i = NumElts; i < WidenNumElts; ++i) + NewOps.push_back(DAG.getUNDEF(EltVT)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size()); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { + EVT InVT = N->getOperand(0).getValueType(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumOperands = N->getNumOperands(); + + bool InputWidened = false; // Indicates we need to widen the input. + if (getTypeAction(InVT) != WidenVector) { + if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { + // Add undef vectors to widen to correct length. + unsigned NumConcat = WidenVT.getVectorNumElements() / + InVT.getVectorNumElements(); + SDValue UndefVal = DAG.getUNDEF(InVT); + SmallVector<SDValue, 16> Ops(NumConcat); + for (unsigned i=0; i < NumOperands; ++i) + Ops[i] = N->getOperand(i); + for (unsigned i = NumOperands; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat); + } + } else { + InputWidened = true; + if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { + // The inputs and the result are widen to the same value. + unsigned i; + for (i=1; i < NumOperands; ++i) + if (N->getOperand(i).getOpcode() != ISD::UNDEF) + break; + + if (i > NumOperands) + // Everything but the first operand is an UNDEF so just return the + // widened first operand. + return GetWidenedVector(N->getOperand(0)); + + if (NumOperands == 2) { + // Replace concat of two operands with a shuffle. + SmallVector<int, 16> MaskOps(WidenNumElts); + for (unsigned i=0; i < WidenNumElts/2; ++i) { + MaskOps[i] = i; + MaskOps[i+WidenNumElts/2] = i+WidenNumElts; + } + return DAG.getVectorShuffle(WidenVT, dl, + GetWidenedVector(N->getOperand(0)), + GetWidenedVector(N->getOperand(1)), + &MaskOps[0]); + } + } + } + + // Fall back to use extracts and build vector. + EVT EltVT = WidenVT.getVectorElementType(); + unsigned NumInElts = InVT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Idx = 0; + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (InputWidened) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); + } + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue InOp = N->getOperand(0); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); + + SDValue DTyOp = DAG.getValueType(WidenVT); + SDValue STyOp = DAG.getValueType(InWidenVT); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + + unsigned InVTNumElts = InVT.getVectorNumElements(); + if (getTypeAction(InVT) == WidenVector) { + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (TLI.isTypeSynthesizable(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (InVTNumElts % WidenNumElts == 0) { + // Extract the input and convert the shorten input vector. + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, + DAG.getIntPtrConstant(0)); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = WidenVT.getVectorElementType(); + DTyOp = DAG.getValueType(EltVT); + STyOp = DAG.getValueType(InEltVT); + + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) { + SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i)); + Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + SDValue Idx = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + + EVT InVT = InOp.getValueType(); + + ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx); + if (CIdx) { + unsigned IdxVal = CIdx->getZExtValue(); + // Check if we can just return the input vector after widening. + if (IdxVal == 0 && InVT == WidenVT) + return InOp; + + // Check if we can extract from the vector. + unsigned InNumElts = InVT.getVectorNumElements(); + if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); + } + + // We could try widening the input to the right length but for now, extract + // the original elements, fill the rest with undefs and build a vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = VT.getVectorElementType(); + EVT IdxVT = Idx.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned i; + if (CIdx) { + unsigned IdxVal = CIdx->getZExtValue(); + for (i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(IdxVal+i, IdxVT)); + } else { + Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx); + for (i=1; i < NumElts; ++i) { + SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + DAG.getConstant(i, IdxVT)); + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx); + } + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(), + InOp.getValueType(), InOp, + N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SDValue Result; + SmallVector<SDValue, 16> LdChain; // Chain for the series of load + if (ExtType != ISD::NON_EXTLOAD) + Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); + else + Result = GenWidenVectorLoads(LdChain, LD); + + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other, + &LdChain[0], LdChain.size()); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewChain); + + return Result; +} + +SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(), + WidenVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue Cond1 = N->getOperand(0); + EVT CondVT = Cond1.getValueType(); + if (CondVT.isVector()) { + EVT CondEltVT = CondVT.getVectorElementType(); + EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), + CondEltVT, WidenNumElts); + if (getTypeAction(CondVT) == WidenVector) + Cond1 = GetWidenedVector(Cond1); + + if (Cond1.getValueType() != CondWidenVT) + Cond1 = ModifyToType(Cond1, CondWidenVT); + } + + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDValue InOp2 = GetWidenedVector(N->getOperand(2)); + assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + WidenVT, Cond1, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { + SDValue InOp1 = GetWidenedVector(N->getOperand(2)); + SDValue InOp2 = GetWidenedVector(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + InOp1.getValueType(), N->getOperand(0), + N->getOperand(1), InOp1, InOp2, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT, + InOp1, InOp2, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getUNDEF(WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned NumElts = VT.getVectorNumElements(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Adjust mask based on new input vector length. + SmallVector<int, 16> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = N->getMaskElt(i); + if (Idx < (int)NumElts) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); + } + for (unsigned i = NumElts; i != WidenNumElts; ++i) + NewMask.push_back(-1); + return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = N->getOperand(0); + EVT InVT = InOp1.getValueType(); + assert(InVT.isVector() && "can not widen non vector type"); + EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), WidenNumElts); + InOp1 = GetWidenedVector(InOp1); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Assume that the input and output will be widen appropriately. If not, + // we will have to unroll it at some point. + assert(InOp1.getValueType() == WidenInVT && + InOp2.getValueType() == WidenInVT && + "Input not widened to expected type!"); + return DAG.getNode(ISD::VSETCC, N->getDebugLoc(), + WidenVT, InOp1, InOp2, N->getOperand(2)); +} + + +//===----------------------------------------------------------------------===// +// Widen Vector Operand +//===----------------------------------------------------------------------===// +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Widen node operand " << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "WidenVectorOperand op #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to widen this operator's operand!"); + + case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::STORE: Res = WidenVecOp_STORE(N); break; + + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Res = WidenVecOp_Convert(N); + break; + } + + // If Res is null, the sub-method took care of registering the result. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { + // Since the result is legal and the input is illegal, it is unlikely + // that we can fix the input to a legal type so unroll the convert + // into some scalar code and create a nasty build vector. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + unsigned NumElts = VT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + + unsigned Opcode = N->getOpcode(); + SmallVector<SDValue, 16> Ops(NumElts); + for (unsigned i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i))); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + EVT InWidenVT = InOp.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Check if we can convert between two legal vector types and extract. + unsigned InWidenSize = InWidenVT.getSizeInBits(); + unsigned Size = VT.getSizeInBits(); + if (InWidenSize % Size == 0 && !VT.isVector()) { + unsigned NewNumElts = InWidenSize / Size; + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); + if (TLI.isTypeSynthesizable(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, + DAG.getIntPtrConstant(0)); + } + } + + return CreateStackStoreLoad(InOp, VT); +} + +SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { + // If the input vector is not legal, it is likely that we will not find a + // legal vector of the same size. Replace the concatenate vector with a + // nasty build vector. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + + EVT InVT = N->getOperand(0).getValueType(); + unsigned NumInElts = InVT.getVectorNumElements(); + + unsigned Idx = 0; + unsigned NumOperands = N->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), + N->getValueType(0), InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + N->getValueType(0), InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { + // We have to widen the value but we want only to store the original + // vector type. + StoreSDNode *ST = cast<StoreSDNode>(N); + + SmallVector<SDValue, 16> StChain; + if (ST->isTruncatingStore()) + GenWidenVectorTruncStores(StChain, ST); + else + GenWidenVectorStores(StChain, ST); + + if (StChain.size() == 1) + return StChain[0]; + else + return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(), + MVT::Other,&StChain[0],StChain.size()); +} + +//===----------------------------------------------------------------------===// +// Vector Widening Utilities +//===----------------------------------------------------------------------===// + +// Utility function to find the type to chop up a widen vector for load/store +// TLI: Target lowering used to determine legal types. +// Width: Width left need to load/store. +// WidenVT: The widen vector type to load to/store from +// Align: If 0, don't allow use of a wider type +// WidenEx: If Align is not 0, the amount additional we can load/store from. + +static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, + unsigned Width, EVT WidenVT, + unsigned Align = 0, unsigned WidenEx = 0) { + EVT WidenEltVT = WidenVT.getVectorElementType(); + unsigned WidenWidth = WidenVT.getSizeInBits(); + unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); + unsigned AlignInBits = Align*8; + + // If we have one element to load/store, return it. + EVT RetVT = WidenEltVT; + if (Width == WidenEltWidth) + return RetVT; + + // See if there is larger legal integer than the element type to load/store + unsigned VT; + for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; + VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { + EVT MemVT((MVT::SimpleValueType) VT); + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (MemVT.getSizeInBits() <= WidenEltWidth) + break; + if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + RetVT = MemVT; + break; + } + } + + // See if there is a larger vector type to load/store that has the same vector + // element type and is evenly divisible with the WidenVT. + for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; + VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { + EVT MemVT = (MVT::SimpleValueType) VT; + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + (WidenWidth % MemVTWidth) == 0 && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) + return MemVT; + } + } + + return RetVT; +} + +// Builds a vector type from scalar loads +// VecTy: Resulting Vector type +// LDOps: Load operators to build a vector type +// [Start,End) the list of loads to use. +static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, + SmallVector<SDValue, 16>& LdOps, + unsigned Start, unsigned End) { + DebugLoc dl = LdOps[Start].getDebugLoc(); + EVT LdTy = LdOps[Start].getValueType(); + unsigned Width = VecTy.getSizeInBits(); + unsigned NumElts = Width / LdTy.getSizeInBits(); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts); + + unsigned Idx = 1; + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]); + + for (unsigned i = Start + 1; i != End; ++i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + NumElts = Width / NewLdTy.getSizeInBits(); + NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); + VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); + // Readjust position and vector position based on new load type + Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); + LdTy = NewLdTy; + } + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], + DAG.getIntPtrConstant(Idx++)); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp); +} + +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, + LoadSDNode * LD) { + // The strategy assumes that we can efficiently load powers of two widths. + // The routines chops the vector into the largest vector loads with the same + // element type or scalar loads and then recombines it to the widen vector + // type. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + unsigned WidenWidth = WidenVT.getSizeInBits(); + EVT LdVT = LD->getMemoryVT(); + DebugLoc dl = LD->getDebugLoc(); + assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + const Value *SV = LD->getSrcValue(); + + int LdWidth = LdVT.getSizeInBits(); + int WidthDiff = WidenWidth - LdWidth; // Difference + unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads + + // Find the vector type that can load from. + EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + int NewVTWidth = NewVT.getSizeInBits(); + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset, + isVolatile, isNonTemporal, Align); + LdChain.push_back(LdOp.getValue(1)); + + // Check if we can load the element with one instruction + if (LdWidth <= NewVTWidth) { + if (!NewVT.isVector()) { + unsigned NumElts = WidenWidth / NewVTWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp); + } + if (NewVT == WidenVT) + return LdOp; + + assert(WidenWidth % NewVTWidth == 0); + unsigned NumConcat = WidenWidth / NewVTWidth; + SmallVector<SDValue, 16> ConcatOps(NumConcat); + SDValue UndefVal = DAG.getUNDEF(NewVT); + ConcatOps[0] = LdOp; + for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], + NumConcat); + } + + // Load vector by using multiple loads from largest vector to scalar + SmallVector<SDValue, 16> LdOps; + LdOps.push_back(LdOp); + + LdWidth -= NewVTWidth; + unsigned Offset = 0; + + while (LdWidth > 0) { + unsigned Increment = NewVTWidth / 8; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + + if (LdWidth < NewVTWidth) { + // Our current type we are using is too large, find a better size + NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + NewVTWidth = NewVT.getSizeInBits(); + } + + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, + SVOffset+Offset, isVolatile, + isNonTemporal, MinAlign(Align, Increment)); + LdChain.push_back(LdOp.getValue(1)); + LdOps.push_back(LdOp); + + LdWidth -= NewVTWidth; + } + + // Build the vector from the loads operations + unsigned End = LdOps.size(); + if (!LdOps[0].getValueType().isVector()) + // All the loads are scalar loads. + return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); + + // If the load contains vectors, build the vector using concat vector. + // All of the vectors used to loads are power of 2 and the scalars load + // can be combined to make a power of 2 vector. + SmallVector<SDValue, 16> ConcatOps(End); + int i = End - 1; + int Idx = End; + EVT LdTy = LdOps[i].getValueType(); + // First combine the scalar loads to a vector + if (!LdTy.isVector()) { + for (--i; i >= 0; --i) { + LdTy = LdOps[i].getValueType(); + if (LdTy.isVector()) + break; + } + ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); + } + ConcatOps[--Idx] = LdOps[i]; + for (--i; i >= 0; --i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + // Create a larger vector + ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, + &ConcatOps[Idx], End - Idx); + Idx = End - 1; + LdTy = NewLdTy; + } + ConcatOps[--Idx] = LdOps[i]; + } + + if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + &ConcatOps[Idx], End - Idx); + + // We need to fill the rest with undefs to build the vector + unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); + SmallVector<SDValue, 16> WidenOps(NumOps); + SDValue UndefVal = DAG.getUNDEF(LdTy); + { + unsigned i = 0; + for (; i != End-Idx; ++i) + WidenOps[i] = ConcatOps[Idx+i]; + for (; i != NumOps; ++i) + WidenOps[i] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps); +} + +SDValue +DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, + LoadSDNode * LD, + ISD::LoadExtType ExtType) { + // For extension loads, it may not be more efficient to chop up the vector + // and then extended it. Instead, we unroll the load and build a new vector. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + DebugLoc dl = LD->getDebugLoc(); + assert(LdVT.isVector() && WidenVT.isVector()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + const Value *SV = LD->getSrcValue(); + + EVT EltVT = WidenVT.getVectorElementType(); + EVT LdEltVT = LdVT.getVectorElementType(); + unsigned NumElts = LdVT.getVectorNumElements(); + + // Load each element and widen + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Increment = LdEltVT.getSizeInBits() / 8; + Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset, + LdEltVT, isVolatile, isNonTemporal, Align); + LdChain.push_back(Ops[0].getValue(1)); + unsigned i = 0, Offset = Increment; + for (i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV, + SVOffset + Offset, LdEltVT, isVolatile, + isNonTemporal, Align); + LdChain.push_back(Ops[i].getValue(1)); + } + + // Fill the rest with undefs + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i != WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); +} + + +void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, + StoreSDNode *ST) { + // The strategy assumes that we can efficiently store powers of two widths. + // The routines chops the vector into the largest vector stores with the same + // element type or scalar stores. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + const Value *SV = ST->getSrcValue(); + int SVOffset = ST->getSrcValueOffset(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + DebugLoc dl = ST->getDebugLoc(); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + EVT ValVT = ValOp.getValueType(); + unsigned ValWidth = ValVT.getSizeInBits(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned ValEltWidth = ValEltVT.getSizeInBits(); + assert(StVT.getVectorElementType() == ValEltVT); + + int Idx = 0; // current index to store + unsigned Offset = 0; // offset from base to store + while (StWidth != 0) { + // Find the largest vector type we can store with + EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); + unsigned NewVTWidth = NewVT.getSizeInBits(); + unsigned Increment = NewVTWidth / 8; + if (NewVT.isVector()) { + unsigned NumVTElts = NewVT.getVectorNumElements(); + do { + SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, + DAG.getIntPtrConstant(Idx)); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, + SVOffset + Offset, isVolatile, + isNonTemporal, + MinAlign(Align, Offset))); + StWidth -= NewVTWidth; + Offset += Increment; + Idx += NumVTElts; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + } while (StWidth != 0 && StWidth >= NewVTWidth); + } else { + // Cast the vector to the scalar type we can store + unsigned NumElts = ValWidth / NewVTWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); + // Readjust index position based on new vector type + Idx = Idx * ValEltWidth / NewVTWidth; + do { + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, + DAG.getIntPtrConstant(Idx++)); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, + SVOffset + Offset, isVolatile, + isNonTemporal, MinAlign(Align, Offset))); + StWidth -= NewVTWidth; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + } while (StWidth != 0 && StWidth >= NewVTWidth); + // Restore index back to be relative to the original widen element type + Idx = Idx * NewVTWidth / ValEltWidth; + } + } +} + +void +DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, + StoreSDNode *ST) { + // For extension loads, it may not be more efficient to truncate the vector + // and then store it. Instead, we extract each element and then store it. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + const Value *SV = ST->getSrcValue(); + int SVOffset = ST->getSrcValueOffset(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + DebugLoc dl = ST->getDebugLoc(); + + EVT StVT = ST->getMemoryVT(); + EVT ValVT = ValOp.getValueType(); + + // It must be true that we the widen vector type is bigger than where + // we need to store. + assert(StVT.isVector() && ValOp.getValueType().isVector()); + assert(StVT.bitsLT(ValOp.getValueType())); + + // For truncating stores, we can not play the tricks of chopping legal + // vector types and bit cast it to the right type. Instead, we unroll + // the store. + EVT StEltVT = StVT.getVectorElementType(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned Increment = ValEltVT.getSizeInBits() / 8; + unsigned NumElts = StVT.getVectorNumElements(); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, + SVOffset, StEltVT, + isVolatile, isNonTemporal, Align)); + unsigned Offset = Increment; + for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, + SVOffset + Offset, StEltVT, + isVolatile, isNonTemporal, + MinAlign(Align, Offset))); + } +} + +/// Modifies a vector input (widen or narrows) to a vector of NVT. The +/// input vector must have the same element type as NVT. +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { + // Note that InOp might have been widened so it might already have + // the right width or it might need be narrowed. + EVT InVT = InOp.getValueType(); + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + DebugLoc dl = InOp.getDebugLoc(); + + // Check if InOp already has the right width. + if (InVT == NVT) + return InOp; + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { + unsigned NumConcat = WidenNumElts / InNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat); + } + + if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, + DAG.getIntPtrConstant(0)); + + // Fall back to extract and build. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = NVT.getVectorElementType(); + unsigned MinNumElts = std::min(WidenNumElts, InNumElts); + unsigned Idx; + for (Idx = 0; Idx < MinNumElts; ++Idx) + Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(Idx)); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for ( ; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/Makefile b/contrib/llvm/lib/CodeGen/SelectionDAG/Makefile new file mode 100644 index 0000000..ea716fd --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/Makefile @@ -0,0 +1,13 @@ +##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMSelectionDAG + +include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h new file mode 100644 index 0000000..ac2d338 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -0,0 +1,114 @@ +//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SDDbgValue class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H +#define LLVM_CODEGEN_SDNODEDBGVALUE_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DebugLoc.h" +#include "llvm/System/DataTypes.h" + +namespace llvm { + +class MDNode; +class SDNode; +class Value; + +/// SDDbgValue - Holds the information from a dbg_value node through SDISel. +/// We do not use SDValue here to avoid including its header. + +class SDDbgValue { +public: + enum DbgValueKind { + SDNODE = 0, // value is the result of an expression + CONST = 1, // value is a constant + FRAMEIX = 2 // value is contents of a stack location + }; +private: + enum DbgValueKind kind; + union { + struct { + SDNode *Node; // valid for expressions + unsigned ResNo; // valid for expressions + } s; + const Value *Const; // valid for constants + unsigned FrameIx; // valid for stack objects + } u; + MDNode *mdPtr; + uint64_t Offset; + DebugLoc DL; + unsigned Order; + bool Invalid; +public: + // Constructor for non-constants. + SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl, + unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O), + Invalid(false) { + kind = SDNODE; + u.s.Node = N; + u.s.ResNo = R; + } + + // Constructor for constants. + SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl, + unsigned O) : + mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { + kind = CONST; + u.Const = C; + } + + // Constructor for frame indices. + SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : + mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { + kind = FRAMEIX; + u.FrameIx = FI; + } + + // Returns the kind. + DbgValueKind getKind() { return kind; } + + // Returns the MDNode pointer. + MDNode *getMDPtr() { return mdPtr; } + + // Returns the SDNode* for a register ref + SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; } + + // Returns the ResNo for a register ref + unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; } + + // Returns the Value* for a constant + const Value *getConst() { assert (kind==CONST); return u.Const; } + + // Returns the FrameIx for a stack object + unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; } + + // Returns the offset. + uint64_t getOffset() { return Offset; } + + // Returns the DebugLoc. + DebugLoc getDebugLoc() { return DL; } + + // Returns the SDNodeOrder. This is the order of the preceding node in the + // input. + unsigned getOrder() { return Order; } + + // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" + // property. A SDDbgValue is invalid if the SDNode that produces the value is + // deleted. + void setIsInvalidated() { Invalid = true; } + bool isInvalidated() { return Invalid; } +}; + +} // end llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h new file mode 100644 index 0000000..f88b26d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h @@ -0,0 +1,54 @@ +//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SDNodeOrdering class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SDNODEORDERING_H +#define LLVM_CODEGEN_SDNODEORDERING_H + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + +class SDNode; + +/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each +/// SDNode that roughly corresponds to the ordering of the original LLVM +/// instruction. This is used for turning off scheduling, because we'll forgo +/// the normal scheduling algorithms and output the instructions according to +/// this ordering. +class SDNodeOrdering { + DenseMap<const SDNode*, unsigned> OrderMap; + + void operator=(const SDNodeOrdering&); // Do not implement. + SDNodeOrdering(const SDNodeOrdering&); // Do not implement. +public: + SDNodeOrdering() {} + + void add(const SDNode *Node, unsigned O) { + OrderMap[Node] = O; + } + void remove(const SDNode *Node) { + DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node); + if (Itr != OrderMap.end()) + OrderMap.erase(Itr); + } + void clear() { + OrderMap.clear(); + } + unsigned getOrder(const SDNode *Node) { + return OrderMap[Node]; + } +}; + +} // end llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp new file mode 100644 index 0000000..fae2729 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -0,0 +1,636 @@ +//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a fast scheduler. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/InlineAsm.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical copies"); + +static RegisterScheduler + fastDAGScheduler("fast", "Fast suboptimal list scheduling", + createFastDAGScheduler); + +namespace { + /// FastPriorityQueue - A degenerate priority queue that considers + /// all nodes to have the same priority. + /// + struct FastPriorityQueue { + SmallVector<SUnit *, 16> Queue; + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + Queue.push_back(U); + } + + SUnit *pop() { + if (empty()) return NULL; + SUnit *V = Queue.back(); + Queue.pop_back(); + return V; + } + }; + +//===----------------------------------------------------------------------===// +/// ScheduleDAGFast - The actual "fast" list scheduler implementation. +/// +class ScheduleDAGFast : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + FastPriorityQueue AvailableQueue; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<unsigned> LiveRegCycles; + +public: + ScheduleDAGFast(MachineFunction &mf) + : ScheduleDAGSDNodes(mf) {} + + void Schedule(); + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + void AddPred(SUnit *SU, const SDep &D) { + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + void RemovePred(SUnit *SU, const SDep &D) { + SU->removePred(D); + } + +private: + void ReleasePred(SUnit *SU, SDep *PredEdge); + void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ScheduleNodeBottomUp(SUnit*, unsigned); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector<SUnit*, 2>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + void ListScheduleBottomUp(); + + /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool ForceUnitLatencies() const { return true; } +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGFast::Schedule() { + DEBUG(dbgs() << "********** List Scheduling **********\n"); + + NumLiveRegs = 0; + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegCycles.resize(TRI->getNumRegs(), 0); + + // Build the scheduling graph. + BuildSchedGraph(NULL); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + // Execute the actual scheduling loop. + ListScheduleBottomUp(); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --PredSU->NumSuccsLeft; + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + AvailableQueue.push(PredSU); + } +} + +void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (!LiveRegDefs[I->getReg()]) { + ++NumLiveRegs; + LiveRegDefs[I->getReg()] = I->getSUnit(); + LiveRegCycles[I->getReg()] = CurCycle; + } + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleasePredecessors(SU, CurCycle); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == SU && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + } + + SU->isScheduled = true; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->getNode()->getFlaggedNode()) + return NULL; + + SDNode *N = SU->getNode(); + if (!N) + return NULL; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Flag) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDValue &Op = N->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (VT == MVT::Flag) + return NULL; + } + + if (TryUnfold) { + SmallVector<SDNode*, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return NULL; + + DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + SUnit *NewSU = NewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + NewSU->isCommutable = true; + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = NewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + } + + SDep ChainPred; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPred = *I; + else if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + if (ChainPred.getSUnit()) { + RemovePred(SU, ChainPred); + if (isNewLoad) + AddPred(LoadSU, ChainPred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) { + AddPred(LoadSU, Pred); + } + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + if (isNewLoad) { + AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); + } + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); + NewSU = Clone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVector<SUnit*, 2> &Copies) { + SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL)); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL)); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + RemovePred(DelDeps[i].first, DelDeps[i].second); + } + + AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); + AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = TID.getNumDefs(); + for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + return N->getValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs, + const TargetRegisterInfo *TRI) { + bool Added = false; + if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { + if (RegAdded.insert(Reg)) { + LRegs.push_back(Reg); + Added = true; + } + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { + if (RegAdded.insert(*Alias)) { + LRegs.push_back(*Alias); + Added = true; + } + } + return Added; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, + SmallVector<unsigned, 4> &LRegs){ + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + RegAdded, LRegs, TRI); + } + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + if (Node->getOpcode() == ISD::INLINEASM) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + ++i; // Skip the ID value. + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags)) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } + if (!Node->isMachineOpcode()) + continue; + const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); + if (!TID.ImplicitDefs) + continue; + for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { + CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } + return !LRegs.empty(); +} + + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGFast::ListScheduleBottomUp() { + unsigned CurCycle = 0; + + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU, CurCycle); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue.push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + SmallVector<SUnit*, 4> NotReady; + DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty()) { + bool Delayed = false; + LRegsMap.clear(); + SUnit *CurSU = AvailableQueue.pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + Delayed = true; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + NotReady.push_back(CurSU); + CurSU = AvailableQueue.pop(); + } + + // All candidates are delayed due to live physical reg dependencies. + // Try code duplication or inserting cross class copies + // to resolve it. + if (Delayed && !CurSU) { + if (!CurSU) { + // Try duplicating the nodes that produces these + // "expensive to copy" values to break the dependency. In case even + // that doesn't work, insert cross class copies. + SUnit *TrySU = NotReady[0]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); + AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + NewDef = Copies.back(); + } + + DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + if (!CurSU) { + llvm_unreachable("Unable to resolve live physical register dependencies!"); + } + } + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + // May no longer be available due to backtracking. + if (NotReady[i]->isAvailable) + AvailableQueue.push(NotReady[i]); + } + NotReady.clear(); + + if (CurSU) + ScheduleNodeBottomUp(CurSU, CurCycle); + ++CurCycle; + } + + // Reverse the order since it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG + VerifySchedule(/*isBottomUp=*/true); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGFast(*IS->MF); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp new file mode 100644 index 0000000..56f5ded --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -0,0 +1,268 @@ +//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include <climits> +using namespace llvm; + +STATISTIC(NumNoops , "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); + +static RegisterScheduler + tdListDAGScheduler("list-td", "Top-down list scheduler", + createTDListDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGList - The actual list scheduler implementation. This supports +/// top-down scheduling. +/// +class ScheduleDAGList : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands become available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + +public: + ScheduleDAGList(MachineFunction &mf, + SchedulingPriorityQueue *availqueue, + ScheduleHazardRecognizer *HR) + : ScheduleDAGSDNodes(mf), + AvailableQueue(availqueue), HazardRec(HR) { + } + + ~ScheduleDAGList() { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule(); + +private: + void ReleaseSucc(SUnit *SU, const SDep &D); + void ReleaseSuccessors(SUnit *SU); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); +}; +} // end anonymous namespace + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGList::Schedule() { + DEBUG(dbgs() << "********** List Scheduling **********\n"); + + // Build the scheduling graph. + BuildSchedGraph(NULL); + + AvailableQueue->initNodes(SUnits); + + ListScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { + SUnit *SuccSU = D.getSUnit(); + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --SuccSU->NumPredsLeft; + + SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + PendingQueue.push_back(SuccSU); +} + +void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { + // Top down: release successors. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-td scheduler doesn't yet support physreg dependencies!"); + + ReleaseSucc(SU, *I); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() == CurCycle) { + AvailableQueue->push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } else { + assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); + } + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue->empty()) { + ++CurCycle; + continue; + } + + SUnit *FoundSUnit = 0; + + bool HasNoopHazards = false; + while (!AvailableQueue->empty()) { + SUnit *CurSUnit = AvailableQueue->pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue->push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule, do it now. + if (FoundSUnit) { + ScheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, just advance + // the current cycle and try again. + DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); + HazardRec->AdvanceCycle(); + ++NumStalls; + ++CurCycle; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DEBUG(dbgs() << "*** Emitting noop\n"); + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + ++CurCycle; + } + } + +#ifndef NDEBUG + VerifySchedule(/*isBottomUp=*/false); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +/// createTDListDAGScheduler - This creates a top-down list scheduler with a +/// new hazard recognizer. This scheduler takes ownership of the hazard +/// recognizer and deletes it when done. +ScheduleDAGSDNodes * +llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGList(*IS->MF, + new LatencyPriorityQueue(), + IS->CreateTargetHazardRecognizer()); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp new file mode 100644 index 0000000..4c3e4e3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -0,0 +1,2019 @@ +//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements bottom-up and top-down register pressure reduction list +// schedulers, using standard algorithms. The basic approach uses a priority +// queue of available nodes to schedule. One at a time, nodes are taken from +// the priority queue (thus in priority order), checked for legality to +// schedule, and emitted if legal. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/InlineAsm.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <climits> +using namespace llvm; + +STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical register copies"); + +static RegisterScheduler + burrListDAGScheduler("list-burr", + "Bottom-up register reduction list scheduling", + createBURRListDAGScheduler); +static RegisterScheduler + tdrListrDAGScheduler("list-tdrr", + "Top-down register reduction list scheduling", + createTDRRListDAGScheduler); +static RegisterScheduler + sourceListDAGScheduler("source", + "Similar to list-burr but schedules in source " + "order when possible", + createSourceListDAGScheduler); + +static RegisterScheduler + hybridListDAGScheduler("list-hybrid", + "Bottom-up register pressure aware list scheduling " + "which tries to balance latency and register pressure", + createHybridListDAGScheduler); + +static RegisterScheduler + ILPListDAGScheduler("list-ilp", + "Bottom-up register pressure aware list scheduling " + "which tries to balance ILP and register pressure", + createILPListDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGRRList - The actual register reduction list scheduler +/// implementation. This supports both top-down and bottom-up scheduling. +/// +class ScheduleDAGRRList : public ScheduleDAGSDNodes { +private: + /// isBottomUp - This is true if the scheduling problem is bottom-up, false if + /// it is top-down. + bool isBottomUp; + + /// NeedLatency - True if the scheduler will make use of latency information. + /// + bool NeedLatency; + + /// AvailableQueue - The priority queue to use for the available SUnits. + SchedulingPriorityQueue *AvailableQueue; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<unsigned> LiveRegCycles; + + /// Topo - A topological ordering for SUnits which permits fast IsReachable + /// and similar queries. + ScheduleDAGTopologicalSort Topo; + +public: + ScheduleDAGRRList(MachineFunction &mf, + bool isbottomup, bool needlatency, + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), + AvailableQueue(availqueue), Topo(SUnits) { + } + + ~ScheduleDAGRRList() { + delete AvailableQueue; + } + + void Schedule(); + + /// IsReachable - Checks if SU is reachable from TargetSU. + bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { + return Topo.IsReachable(SU, TargetSU); + } + + /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will + /// create a cycle. + bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + return Topo.WillCreateCycle(SU, TargetSU); + } + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + /// Updates the topological ordering if required. + void AddPred(SUnit *SU, const SDep &D) { + Topo.AddPred(SU, D.getSUnit()); + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + /// Updates the topological ordering if required. + void RemovePred(SUnit *SU, const SDep &D) { + Topo.RemovePred(SU, D.getSUnit()); + SU->removePred(D); + } + +private: + void ReleasePred(SUnit *SU, const SDep *PredEdge); + void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); + void ReleaseSuccessors(SUnit *SU); + void CapturePred(SDep *PredEdge); + void ScheduleNodeBottomUp(SUnit*, unsigned); + void ScheduleNodeTopDown(SUnit*, unsigned); + void UnscheduleNodeBottomUp(SUnit*); + void BacktrackBottomUp(SUnit*, unsigned, unsigned&); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector<SUnit*, 2>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + void ListScheduleTopDown(); + void ListScheduleBottomUp(); + + + /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. + /// Updates the topological ordering if required. + SUnit *CreateNewSUnit(SDNode *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = NewSUnit(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// CreateClone - Creates a new SUnit from an existing one. + /// Updates the topological ordering if required. + SUnit *CreateClone(SUnit *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = Clone(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't + /// need actual latency information but the hybrid scheduler does. + bool ForceUnitLatencies() const { + return !NeedLatency; + } +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGRRList::Schedule() { + DEBUG(dbgs() + << "********** List Scheduling BB#" << BB->getNumber() + << " **********\n"); + + NumLiveRegs = 0; + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegCycles.resize(TRI->getNumRegs(), 0); + + // Build the scheduling graph. + BuildSchedGraph(NULL); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + Topo.InitDAGTopologicalSorting(); + + AvailableQueue->initNodes(SUnits); + + // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. + if (isBottomUp) + ListScheduleBottomUp(); + else + ListScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --PredSU->NumSuccsLeft; + + if (!ForceUnitLatencies()) { + // Updating predecessor's height. This is now the cycle when the + // predecessor can be scheduled without causing a pipeline stall. + PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); + } + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + AvailableQueue->push(PredSU); + } +} + +void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (!LiveRegDefs[I->getReg()]) { + ++NumLiveRegs; + LiveRegDefs[I->getReg()] = I->getSUnit(); + LiveRegCycles[I->getReg()] = CurCycle; + } + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + +#ifndef NDEBUG + if (CurCycle < SU->getHeight()) + DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); +#endif + + // FIXME: Handle noop hazard. + SU->setHeightToAtLeast(CurCycle); + Sequence.push_back(SU); + + AvailableQueue->ScheduledNode(SU); + + ReleasePredecessors(SU, CurCycle); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == SU && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + } + + SU->isScheduled = true; +} + +/// CapturePred - This does the opposite of ReleasePred. Since SU is being +/// unscheduled, incrcease the succ left count of its predecessors. Remove +/// them from AvailableQueue if necessary. +void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + if (PredSU->isAvailable) { + PredSU->isAvailable = false; + if (!PredSU->isPending) + AvailableQueue->remove(PredSU); + } + + assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); + ++PredSU->NumSuccsLeft; +} + +/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and +/// its predecessor states to reflect the change. +void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { + DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); + DEBUG(SU->dump(this)); + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + CapturePred(&*I); + if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == I->getSUnit() && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (!LiveRegDefs[I->getReg()]) { + LiveRegDefs[I->getReg()] = SU; + ++NumLiveRegs; + } + if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) + LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); + } + } + + SU->setHeightDirty(); + SU->isScheduled = false; + SU->isAvailable = true; + AvailableQueue->push(SU); + AvailableQueue->UnscheduledNode(SU); +} + +/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in +/// BTCycle in order to schedule a specific node. +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, + unsigned &CurCycle) { + SUnit *OldSU = NULL; + while (CurCycle > BtCycle) { + OldSU = Sequence.back(); + Sequence.pop_back(); + if (SU->isSucc(OldSU)) + // Don't try to remove SU from AvailableQueue. + SU->isAvailable = false; + UnscheduleNodeBottomUp(OldSU); + --CurCycle; + AvailableQueue->setCurCycle(CurCycle); + } + + assert(!SU->isSucc(OldSU) && "Something is wrong!"); + + ++NumBacktracks; +} + +static bool isOperandOf(const SUnit *SU, SDNode *N) { + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getFlaggedNode()) { + if (SUNode->isOperandOf(N)) + return true; + } + return false; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->getNode()->getFlaggedNode()) + return NULL; + + SDNode *N = SU->getNode(); + if (!N) + return NULL; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Flag) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDValue &Op = N->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (VT == MVT::Flag) + return NULL; + } + + if (TryUnfold) { + SmallVector<SDNode*, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return NULL; + + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = CreateNewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + ComputeLatency(LoadSU); + } + + SUnit *NewSU = CreateNewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + NewSU->isCommutable = true; + ComputeLatency(NewSU); + + // Record all the edges to and from the old SU, by category. + SmallVector<SDep, 4> ChainPreds; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPreds.push_back(*I); + else if (isOperandOf(I->getSUnit(), LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + // Now assign edges to the newly-created nodes. + for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { + const SDep &Pred = ChainPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + + // Add a data dependency to reflect that NewSU reads the value defined + // by LoadSU. + AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); + + if (isNewLoad) + AvailableQueue->addNode(LoadSU); + AvailableQueue->addNode(NewSU); + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); + NewSU = CreateClone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(NewSU); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVector<SUnit*, 2> &Copies) { + SUnit *CopyFromSU = CreateNewSUnit(NULL); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = CreateNewSUnit(NULL); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); + AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(CopyFromSU); + AvailableQueue->addNode(CopyToSU); + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = TID.getNumDefs(); + for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + return N->getValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs, + const TargetRegisterInfo *TRI) { + bool Added = false; + if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { + if (RegAdded.insert(Reg)) { + LRegs.push_back(Reg); + Added = true; + } + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { + if (RegAdded.insert(*Alias)) { + LRegs.push_back(*Alias); + Added = true; + } + } + return Added; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, + SmallVector<unsigned, 4> &LRegs){ + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + RegAdded, LRegs, TRI); + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + if (Node->getOpcode() == ISD::INLINEASM) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + ++i; // Skip the ID value. + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags)) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } + + if (!Node->isMachineOpcode()) + continue; + const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); + if (!TID.ImplicitDefs) + continue; + for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) + CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + return !LRegs.empty(); +} + + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGRRList::ListScheduleBottomUp() { + unsigned CurCycle = 0; + + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU, CurCycle); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue->push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + SmallVector<SUnit*, 4> NotReady; + DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty()) { + bool Delayed = false; + LRegsMap.clear(); + SUnit *CurSU = AvailableQueue->pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + Delayed = true; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + NotReady.push_back(CurSU); + CurSU = AvailableQueue->pop(); + } + + // All candidates are delayed due to live physical reg dependencies. + // Try backtracking, code duplication, or inserting cross class copies + // to resolve it. + if (Delayed && !CurSU) { + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + SUnit *TrySU = NotReady[i]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + + // Try unscheduling up to the point where it's safe to schedule + // this node. + unsigned LiveCycle = CurCycle; + for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { + unsigned Reg = LRegs[j]; + unsigned LCycle = LiveRegCycles[Reg]; + LiveCycle = std::min(LiveCycle, LCycle); + } + SUnit *OldSU = Sequence[LiveCycle]; + if (!WillCreateCycle(TrySU, OldSU)) { + BacktrackBottomUp(TrySU, LiveCycle, CurCycle); + // Force the current node to be scheduled before the node that + // requires the physical reg dep. + if (OldSU->isAvailable) { + OldSU->isAvailable = false; + AvailableQueue->remove(OldSU); + } + AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + // If one or more successors has been unscheduled, then the current + // node is no longer avaialable. Schedule a successor that's now + // available instead. + if (!TrySU->isAvailable) + CurSU = AvailableQueue->pop(); + else { + CurSU = TrySU; + TrySU->isPending = false; + NotReady.erase(NotReady.begin()+i); + } + break; + } + } + + if (!CurSU) { + // Can't backtrack. If it's too expensive to copy the value, then try + // duplicate the nodes that produces these "too expensive to copy" + // values to break the dependency. In case even that doesn't work, + // insert cross class copies. + // If it's not too expensive, i.e. cost != -1, issue copies. + SUnit *TrySU = NotReady[0]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); + AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + NewDef = Copies.back(); + } + + DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + assert(CurSU && "Unable to resolve live physical register dependencies!"); + } + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + // May no longer be available due to backtracking. + if (NotReady[i]->isAvailable) + AvailableQueue->push(NotReady[i]); + } + NotReady.clear(); + + if (CurSU) + ScheduleNodeBottomUp(CurSU, CurCycle); + ++CurCycle; + AvailableQueue->setCurCycle(CurCycle); + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG + VerifySchedule(isBottomUp); +#endif +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --SuccSU->NumPredsLeft; + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { + SuccSU->isAvailable = true; + AvailableQueue->push(SuccSU); + } +} + +void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { + // Top down: release successors + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-tdrr scheduler doesn't yet support physreg dependencies!"); + + ReleaseSucc(SU, &*I); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGRRList::ListScheduleTopDown() { + unsigned CurCycle = 0; + AvailableQueue->setCurCycle(CurCycle); + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty()) { + SUnit *CurSU = AvailableQueue->pop(); + + if (CurSU) + ScheduleNodeTopDown(CurSU, CurCycle); + ++CurCycle; + AvailableQueue->setCurCycle(CurCycle); + } + +#ifndef NDEBUG + VerifySchedule(isBottomUp); +#endif +} + + +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Implementation +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// +namespace { + template<class SF> + class RegReductionPriorityQueue; + + /// bu_ls_rr_sort - Priority function for bottom up register pressure + // reduction scheduler. + struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; + bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} + bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + // td_ls_rr_sort - Priority function for top down register pressure reduction + // scheduler. + struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; + td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} + td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + // src_ls_rr_sort - Priority function for source order scheduler. + struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; + src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) + : SPQ(spq) {} + src_ls_rr_sort(const src_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + // hybrid_ls_rr_sort - Priority function for hybrid scheduler. + struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; + hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) + : SPQ(spq) {} + hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) + // scheduler. + struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ; + ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq) + : SPQ(spq) {} + ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; +} // end anonymous namespace + +/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. +/// Smaller number is the higher priority. +static unsigned +CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { + unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; + if (SethiUllmanNumber != 0) + return SethiUllmanNumber; + + unsigned Extra = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + SUnit *PredSU = I->getSUnit(); + unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber) + ++Extra; + } + + SethiUllmanNumber += Extra; + + if (SethiUllmanNumber == 0) + SethiUllmanNumber = 1; + + return SethiUllmanNumber; +} + +namespace { + template<class SF> + class RegReductionPriorityQueue : public SchedulingPriorityQueue { + std::vector<SUnit*> Queue; + SF Picker; + unsigned CurQueueId; + bool TracksRegPressure; + + protected: + // SUnits - The SUnits for the current graph. + std::vector<SUnit> *SUnits; + + MachineFunction &MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + ScheduleDAGRRList *scheduleDAG; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector<unsigned> SethiUllmanNumbers; + + /// RegPressure - Tracking current reg pressure per register class. + /// + std::vector<unsigned> RegPressure; + + /// RegLimit - Tracking the number of allocatable registers per register + /// class. + std::vector<unsigned> RegLimit; + + public: + RegReductionPriorityQueue(MachineFunction &mf, + bool tracksrp, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp), + MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { + if (TracksRegPressure) { + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); + } + } + + void initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + } + + void addNode(const SUnit *SU) { + unsigned SUSize = SethiUllmanNumbers.size(); + if (SUnits->size() > SUSize) + SethiUllmanNumbers.resize(SUSize*2, 0); + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); + } + + void updateNode(const SUnit *SU) { + SethiUllmanNumbers[SU->NodeNum] = 0; + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); + } + + void releaseState() { + SUnits = 0; + SethiUllmanNumbers.clear(); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + } + + unsigned getNodePriority(const SUnit *SU) const { + assert(SU->NodeNum < SethiUllmanNumbers.size()); + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return 0; + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return 0; + if (SU->NumSuccs == 0 && SU->NumPreds != 0) + // If SU does not have a register use, i.e. it doesn't produce a value + // that would be consumed (e.g. store), then it terminates a chain of + // computation. Give it a large SethiUllman number so it will be + // scheduled right before its predecessors that it doesn't lengthen + // their live ranges. + return 0xffff; + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return 0; + return SethiUllmanNumbers[SU->NodeNum]; + } + + unsigned getNodeOrdering(const SUnit *SU) const { + return scheduleDAG->DAG->GetOrdering(SU->getNode()); + } + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + assert(!U->NodeQueueId && "Node in the queue already"); + U->NodeQueueId = ++CurQueueId; + Queue.push_back(U); + } + + SUnit *pop() { + if (empty()) return NULL; + std::vector<SUnit *>::iterator Best = Queue.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Queue.end())) + std::swap(*Best, Queue.back()); + Queue.pop_back(); + V->NodeQueueId = 0; + return V; + } + + void remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + assert(SU->NodeQueueId != 0 && "Not in queue!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), + SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); + SU->NodeQueueId = 0; + } + + bool HighRegPressure(const SUnit *SU) const { + if (!TLI) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + // Check if this increases register pressure of the specific register + // class to the point where it would cause spills. + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + // Check if this increases register pressure of the specific register + // class to the point where it would cause spills. + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = PN->getValueType(i); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + return true; // Reg pressure already high. + unsigned Cost = TLI->getRepRegClassCostFor(VT); + if (!PN->hasAnyUseOfValue(i)) + continue; + // Check if this increases register pressure of the specific register + // class to the point where it would cause spills. + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + } + } + + return false; + } + + void ScheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + const SDNode *N = SU->getNode(); + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + if (PredSU->NumSuccsLeft != PredSU->NumSuccs) + continue; + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = PN->getValueType(i); + if (!PN->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + } + + // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() + // may transfer data dependencies to CopyToReg. + if (SU->NumSuccs && N->isMachineOpcode()) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = N->getValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + + dumpRegPressure(); + } + + void UnscheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + const SDNode *N = SU->getNode(); + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + if (PredSU->NumSuccsLeft != PredSU->NumSuccs) + continue; + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = PN->getValueType(i); + if (!PN->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + + // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() + // may transfer data dependencies to CopyToReg. + if (SU->NumSuccs && N->isMachineOpcode()) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Flag || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + } + + dumpRegPressure(); + } + + void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { + scheduleDAG = scheduleDag; + } + + void dumpRegPressure() const { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + unsigned Id = RC->getID(); + unsigned RP = RegPressure[Id]; + if (!RP) continue; + DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] + << '\n'); + } + } + + protected: + bool canClobber(const SUnit *SU, const SUnit *Op); + void AddPseudoTwoAddrDeps(); + void PrescheduleNodesWithMultipleUses(); + void CalculateSethiUllmanNumbers(); + }; + + typedef RegReductionPriorityQueue<bu_ls_rr_sort> + BURegReductionPriorityQueue; + + typedef RegReductionPriorityQueue<td_ls_rr_sort> + TDRegReductionPriorityQueue; + + typedef RegReductionPriorityQueue<src_ls_rr_sort> + SrcRegReductionPriorityQueue; + + typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> + HybridBURRPriorityQueue; + + typedef RegReductionPriorityQueue<ilp_ls_rr_sort> + ILPBURRPriorityQueue; +} + +/// closestSucc - Returns the scheduled cycle of the successor which is +/// closest to the current cycle. +static unsigned closestSucc(const SUnit *SU) { + unsigned MaxHeight = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain succs + unsigned Height = I->getSUnit()->getHeight(); + // If there are bunch of CopyToRegs stacked up, they should be considered + // to be at the same position. + if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) + Height = closestSucc(I->getSUnit())+1; + if (Height > MaxHeight) + MaxHeight = Height; + } + return MaxHeight; +} + +/// calcMaxScratches - Returns an cost estimate of the worse case requirement +/// for scratch registers, i.e. number of data dependencies. +static unsigned calcMaxScratches(const SUnit *SU) { + unsigned Scratches = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + Scratches++; + } + return Scratches; +} + +template <typename RRSort> +static bool BURRSort(const SUnit *left, const SUnit *right, + const RegReductionPriorityQueue<RRSort> *SPQ) { + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + if (LPriority != RPriority) + return LPriority > RPriority; + + // Try schedule def + use closer when Sethi-Ullman numbers are the same. + // e.g. + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // and the following instructions are both ready. + // t2 = op c3 + // t4 = op c4 + // + // Then schedule t2 = op first. + // i.e. + // t4 = op c4 + // t2 = op c3 + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // This creates more short live intervals. + unsigned LDist = closestSucc(left); + unsigned RDist = closestSucc(right); + if (LDist != RDist) + return LDist < RDist; + + // How many registers becomes live when the node is scheduled. + unsigned LScratch = calcMaxScratches(left); + unsigned RScratch = calcMaxScratches(right); + if (LScratch != RScratch) + return LScratch > RScratch; + + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + + assert(left->NodeQueueId && right->NodeQueueId && + "NodeQueueId cannot be zero"); + return (left->NodeQueueId > right->NodeQueueId); +} + +// Bottom up +bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + return BURRSort(left, right, SPQ); +} + +// Source order, otherwise bottom up. +bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + + return BURRSort(left, right, SPQ); +} + +bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ + bool LHigh = SPQ->HighRegPressure(left); + bool RHigh = SPQ->HighRegPressure(right); + // Avoid causing spills. If register pressure is high, schedule for + // register pressure reduction. + if (LHigh && !RHigh) + return true; + else if (!LHigh && RHigh) + return false; + else if (!LHigh && !RHigh) { + // Low register pressure situation, schedule for latency if possible. + bool LStall = left->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < left->getHeight(); + bool RStall = right->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < right->getHeight(); + // If scheduling one of the node will cause a pipeline stall, delay it. + // If scheduling either one of the node will cause a pipeline stall, sort + // them according to their height. + // If neither will cause a pipeline stall, try to reduce register pressure. + if (LStall) { + if (!RStall) + return true; + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + } else if (RStall) + return false; + + // If either node is scheduling for latency, sort them by height and latency + // first. + if (left->SchedulingPref == Sched::Latency || + right->SchedulingPref == Sched::Latency) { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + if (left->Latency != right->Latency) + return left->Latency > right->Latency; + } + } + + return BURRSort(left, right, SPQ); +} + +bool ilp_ls_rr_sort::operator()(const SUnit *left, + const SUnit *right) const { + bool LHigh = SPQ->HighRegPressure(left); + bool RHigh = SPQ->HighRegPressure(right); + // Avoid causing spills. If register pressure is high, schedule for + // register pressure reduction. + if (LHigh && !RHigh) + return true; + else if (!LHigh && RHigh) + return false; + else if (!LHigh && !RHigh) { + // Low register pressure situation, schedule to maximize instruction level + // parallelism. + if (left->NumPreds > right->NumPreds) + return false; + else if (left->NumPreds < right->NumPreds) + return false; + } + + return BURRSort(left, right, SPQ); +} + +template<class SF> +bool +RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { + if (SU->isTwoAddress) { + unsigned Opc = SU->getNode()->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + unsigned NumRes = TID.getNumDefs(); + unsigned NumOps = TID.getNumOperands() - NumRes; + for (unsigned i = 0; i != NumOps; ++i) { + if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { + SDNode *DU = SU->getNode()->getOperand(i).getNode(); + if (DU->getNodeId() != -1 && + Op->OrigNode == &(*SUnits)[DU->getNodeId()]) + return true; + } + } + } + return false; +} + +/// hasCopyToRegUse - Return true if SU has a value successor that is a +/// CopyToReg node. +static bool hasCopyToRegUse(const SUnit *SU) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *SuccSU = I->getSUnit(); + if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) + return true; + } + return false; +} + +/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's +/// physical register defs. +static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SDNode *N = SuccSU->getNode(); + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + assert(ImpDefs && "Caller should check hasPhysRegDefs"); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getFlaggedNode()) { + if (!SUNode->isMachineOpcode()) + continue; + const unsigned *SUImpDefs = + TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); + if (!SUImpDefs) + return false; + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Flag || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned Reg = ImpDefs[i - NumDefs]; + for (;*SUImpDefs; ++SUImpDefs) { + unsigned SUReg = *SUImpDefs; + if (TRI->regsOverlap(Reg, SUReg)) + return true; + } + } + } + return false; +} + +/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses +/// are not handled well by the general register pressure reduction +/// heuristics. When presented with code like this: +/// +/// N +/// / | +/// / | +/// U store +/// | +/// ... +/// +/// the heuristics tend to push the store up, but since the +/// operand of the store has another use (U), this would increase +/// the length of that other use (the U->N edge). +/// +/// This function transforms code like the above to route U's +/// dependence through the store when possible, like this: +/// +/// N +/// || +/// || +/// store +/// | +/// U +/// | +/// ... +/// +/// This results in the store being scheduled immediately +/// after N, which shortens the U->N live range, reducing +/// register pressure. +/// +template<class SF> +void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { + // Visit all the nodes in topological order, working top-down. + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + // For now, only look at nodes with no data successors, such as stores. + // These are especially important, due to the heuristics in + // getNodePriority for nodes with no data successors. + if (SU->NumSuccs != 0) + continue; + // For now, only look at nodes with exactly one data predecessor. + if (SU->NumPreds != 1) + continue; + // Avoid prescheduling copies to virtual registers, which don't behave + // like other nodes from the perspective of scheduling heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyToReg && + TargetRegisterInfo::isVirtualRegister + (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + // Locate the single data predecessor. + SUnit *PredSU = 0; + for (SUnit::const_pred_iterator II = SU->Preds.begin(), + EE = SU->Preds.end(); II != EE; ++II) + if (!II->isCtrl()) { + PredSU = II->getSUnit(); + break; + } + assert(PredSU); + + // Don't rewrite edges that carry physregs, because that requires additional + // support infrastructure. + if (PredSU->hasPhysRegDefs) + continue; + // Short-circuit the case where SU is PredSU's only data successor. + if (PredSU->NumSuccs == 1) + continue; + // Avoid prescheduling to copies from virtual registers, which don't behave + // like other nodes from the perspective of scheduling // heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyFromReg && + TargetRegisterInfo::isVirtualRegister + (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + // Perform checks on the successors of PredSU. + for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), + EE = PredSU->Succs.end(); II != EE; ++II) { + SUnit *PredSuccSU = II->getSUnit(); + if (PredSuccSU == SU) continue; + // If PredSU has another successor with no data successors, for + // now don't attempt to choose either over the other. + if (PredSuccSU->NumSuccs == 0) + goto outer_loop_continue; + // Don't break physical register dependencies. + if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) + if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) + goto outer_loop_continue; + // Don't introduce graph cycles. + if (scheduleDAG->IsReachable(SU, PredSuccSU)) + goto outer_loop_continue; + } + + // Ok, the transformation is safe and the heuristics suggest it is + // profitable. Update the graph. + DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum + << " next to PredSU #" << PredSU->NodeNum + << " to guide scheduling in the presence of multiple uses\n"); + for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { + SDep Edge = PredSU->Succs[i]; + assert(!Edge.isAssignedRegDep()); + SUnit *SuccSU = Edge.getSUnit(); + if (SuccSU != SU) { + Edge.setSUnit(PredSU); + scheduleDAG->RemovePred(SuccSU, Edge); + scheduleDAG->AddPred(SU, Edge); + Edge.setSUnit(SU); + scheduleDAG->AddPred(SuccSU, Edge); + --i; + } + } + outer_loop_continue:; + } +} + +/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses +/// it as a def&use operand. Add a pseudo control edge from it to the other +/// node (if it won't create a cycle) so the two-address one will be scheduled +/// first (lower in the schedule). If both nodes are two-address, favor the +/// one that has a CopyToReg use (more likely to be a loop induction update). +/// If both are two-address, but one is commutable while the other is not +/// commutable, favor the one that's not commutable. +template<class SF> +void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + if (!SU->isTwoAddress) + continue; + + SDNode *Node = SU->getNode(); + if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) + continue; + + unsigned Opc = Node->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + unsigned NumRes = TID.getNumDefs(); + unsigned NumOps = TID.getNumOperands() - NumRes; + for (unsigned j = 0; j != NumOps; ++j) { + if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) + continue; + SDNode *DU = SU->getNode()->getOperand(j).getNode(); + if (DU->getNodeId() == -1) + continue; + const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; + if (!DUSU) continue; + for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), + E = DUSU->Succs.end(); I != E; ++I) { + if (I->isCtrl()) continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU == SU) + continue; + // Be conservative. Ignore if nodes aren't at roughly the same + // depth and height. + if (SuccSU->getHeight() < SU->getHeight() && + (SU->getHeight() - SuccSU->getHeight()) > 1) + continue; + // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge + // constrains whatever is using the copy, instead of the copy + // itself. In the case that the copy is coalesced, this + // preserves the intent of the pseudo two-address heurietics. + while (SuccSU->Succs.size() == 1 && + SuccSU->getNode()->isMachineOpcode() && + SuccSU->getNode()->getMachineOpcode() == + TargetOpcode::COPY_TO_REGCLASS) + SuccSU = SuccSU->Succs.front().getSUnit(); + // Don't constrain non-instruction nodes. + if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) + continue; + // Don't constrain nodes with physical register defs if the + // predecessor can clobber them. + if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { + if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) + continue; + } + // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; + // these may be coalesced away. We want them close to their uses. + unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); + if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || + SuccOpc == TargetOpcode::INSERT_SUBREG || + SuccOpc == TargetOpcode::SUBREG_TO_REG) + continue; + if ((!canClobber(SuccSU, DUSU) || + (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || + (!SU->isCommutable && SuccSU->isCommutable)) && + !scheduleDAG->IsReachable(SuccSU, SU)) { + DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" + << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); + scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + } + } + } + } +} + +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +template<class SF> +void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { + SethiUllmanNumbers.assign(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); +} + +/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled +/// predecessors of the successors of the SUnit SU. Stop when the provided +/// limit is exceeded. +static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, + unsigned Limit) { + unsigned Sum = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + const SUnit *SuccSU = I->getSUnit(); + for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), + EE = SuccSU->Preds.end(); II != EE; ++II) { + SUnit *PredSU = II->getSUnit(); + if (!PredSU->isScheduled) + if (++Sum > Limit) + return Sum; + } + } + return Sum; +} + + +// Top down +bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); + bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); + bool LIsFloater = LIsTarget && left->NumPreds == 0; + bool RIsFloater = RIsTarget && right->NumPreds == 0; + unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; + unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; + + if (left->NumSuccs == 0 && right->NumSuccs != 0) + return false; + else if (left->NumSuccs != 0 && right->NumSuccs == 0) + return true; + + if (LIsFloater) + LBonus -= 2; + if (RIsFloater) + RBonus -= 2; + if (left->NumSuccs == 1) + LBonus += 2; + if (right->NumSuccs == 1) + RBonus += 2; + + if (LPriority+LBonus != RPriority+RBonus) + return LPriority+LBonus < RPriority+RBonus; + + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + + if (left->NumSuccsLeft != right->NumSuccsLeft) + return left->NumSuccsLeft > right->NumSuccsLeft; + + assert(left->NodeQueueId && right->NodeQueueId && + "NodeQueueId cannot be zero"); + return (left->NodeQueueId > right->NodeQueueId); +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + BURegReductionPriorityQueue *PQ = + new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + TDRegReductionPriorityQueue *PQ = + new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + SrcRegReductionPriorityQueue *PQ = + new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetLowering *TLI = &IS->getTargetLowering(); + + HybridBURRPriorityQueue *PQ = + new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetLowering *TLI = &IS->getTargetLowering(); + + ILPBURRPriorityQueue *PQ = + new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + PQ->setScheduleDAG(SD); + return SD; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp new file mode 100644 index 0000000..f1bf82a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -0,0 +1,672 @@ +//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "SDNodeDbgValue.h" +#include "ScheduleDAGSDNodes.h" +#include "InstrEmitter.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(LoadsClustered, "Number of loads clustered together"); + +ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) + : ScheduleDAG(mf) { +} + +/// Run - perform scheduling. +/// +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, + MachineBasicBlock::iterator insertPos) { + DAG = dag; + ScheduleDAG::Run(bb, insertPos); +} + +/// NewSUnit - Creates a new SUnit and return a ptr to it. +/// +SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { +#ifndef NDEBUG + const SUnit *Addr = 0; + if (!SUnits.empty()) + Addr = &SUnits[0]; +#endif + SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); + assert((Addr == 0 || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + SUnit *SU = &SUnits.back(); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + if (!N || + (N->isMachineOpcode() && + N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)) + SU->SchedulingPref = Sched::None; + else + SU->SchedulingPref = TLI.getSchedulingPreference(N); + return SU; +} + +SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { + SUnit *SU = NewSUnit(Old->getNode()); + SU->OrigNode = Old->OrigNode; + SU->Latency = Old->Latency; + SU->isTwoAddress = Old->isTwoAddress; + SU->isCommutable = Old->isCommutable; + SU->hasPhysRegDefs = Old->hasPhysRegDefs; + SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->SchedulingPref = Old->SchedulingPref; + Old->isCloned = true; + return SU; +} + +/// CheckForPhysRegDependency - Check if the dependency between def and use of +/// a specified operand is a physical register dependency. If so, returns the +/// register and the cost of copying the register. +static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, + unsigned &PhysReg, int &Cost) { + if (Op != 2 || User->getOpcode() != ISD::CopyToReg) + return; + + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return; + + unsigned ResNo = User->getOperand(2).getResNo(); + if (Def->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); + if (ResNo >= II.getNumDefs() && + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { + PhysReg = Reg; + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo)); + Cost = RC->getCopyCost(); + } + } +} + +static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, + SelectionDAG *DAG) { + SmallVector<EVT, 4> VTs; + SDNode *FlagDestNode = Flag.getNode(); + + // Don't add a flag from a node to itself. + if (FlagDestNode == N) return; + + // Don't add a flag to something which already has a flag. + if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return; + + for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) + VTs.push_back(N->getValueType(I)); + + if (AddFlag) + VTs.push_back(MVT::Flag); + + SmallVector<SDValue, 4> Ops; + for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) + Ops.push_back(N->getOperand(I)); + + if (FlagDestNode) + Ops.push_back(Flag); + + SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); + MachineSDNode::mmo_iterator Begin = 0, End = 0; + MachineSDNode *MN = dyn_cast<MachineSDNode>(N); + + // Store memory references. + if (MN) { + Begin = MN->memoperands_begin(); + End = MN->memoperands_end(); + } + + DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); + + // Reset the memory references + if (MN) + MN->setMemRefs(Begin, End); +} + +/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them. +/// This function finds loads of the same base and different offsets. If the +/// offsets are not far apart (target specific), it add MVT::Flag inputs and +/// outputs to ensure they are scheduled together and in order. This +/// optimization may benefit some targets by improving cache locality. +void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { + SDNode *Chain = 0; + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) + Chain = Node->getOperand(NumOps-1).getNode(); + if (!Chain) + return; + + // Look for other loads of the same chain. Find loads that are loading from + // the same base pointer and different offsets. + SmallPtrSet<SDNode*, 16> Visited; + SmallVector<int64_t, 4> Offsets; + DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. + bool Cluster = false; + SDNode *Base = Node; + for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); + I != E; ++I) { + SDNode *User = *I; + if (User == Node || !Visited.insert(User)) + continue; + int64_t Offset1, Offset2; + if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || + Offset1 == Offset2) + // FIXME: Should be ok if they addresses are identical. But earlier + // optimizations really should have eliminated one of the loads. + continue; + if (O2SMap.insert(std::make_pair(Offset1, Base)).second) + Offsets.push_back(Offset1); + O2SMap.insert(std::make_pair(Offset2, User)); + Offsets.push_back(Offset2); + if (Offset2 < Offset1) + Base = User; + Cluster = true; + } + + if (!Cluster) + return; + + // Sort them in increasing order. + std::sort(Offsets.begin(), Offsets.end()); + + // Check if the loads are close enough. + SmallVector<SDNode*, 4> Loads; + unsigned NumLoads = 0; + int64_t BaseOff = Offsets[0]; + SDNode *BaseLoad = O2SMap[BaseOff]; + Loads.push_back(BaseLoad); + for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { + int64_t Offset = Offsets[i]; + SDNode *Load = O2SMap[Offset]; + if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads)) + break; // Stop right here. Ignore loads that are further away. + Loads.push_back(Load); + ++NumLoads; + } + + if (NumLoads == 0) + return; + + // Cluster loads by adding MVT::Flag outputs and inputs. This also + // ensure they are scheduled in order of increasing addresses. + SDNode *Lead = Loads[0]; + AddFlags(Lead, SDValue(0, 0), true, DAG); + + SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1); + for (unsigned I = 1, E = Loads.size(); I != E; ++I) { + bool OutFlag = I < E - 1; + SDNode *Load = Loads[I]; + + AddFlags(Load, InFlag, OutFlag, DAG); + + if (OutFlag) + InFlag = SDValue(Load, Load->getNumValues() - 1); + + ++LoadsClustered; + } +} + +/// ClusterNodes - Cluster certain nodes which should be scheduled together. +/// +void ScheduleDAGSDNodes::ClusterNodes() { + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + SDNode *Node = &*NI; + if (!Node || !Node->isMachineOpcode()) + continue; + + unsigned Opc = Node->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + if (TID.mayLoad()) + // Cluster loads from "near" addresses into combined SUnits. + ClusterNeighboringLoads(Node); + } +} + +void ScheduleDAGSDNodes::BuildSchedUnits() { + // During scheduling, the NodeId field of SDNode is used to map SDNodes + // to their associated SUnits by holding SUnits table indices. A value + // of -1 means the SDNode does not yet have an associated SUnit. + unsigned NumNodes = 0; + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + NI->setNodeId(-1); + ++NumNodes; + } + + // Reserve entries in the vector for each of the SUnits we are creating. This + // ensure that reallocation of the vector won't happen, so SUnit*'s won't get + // invalidated. + // FIXME: Multiply by 2 because we may clone nodes during scheduling. + // This is a temporary workaround. + SUnits.reserve(NumNodes * 2); + + // Add all nodes in depth first order. + SmallVector<SDNode*, 64> Worklist; + SmallPtrSet<SDNode*, 64> Visited; + Worklist.push_back(DAG->getRoot().getNode()); + Visited.insert(DAG->getRoot().getNode()); + + while (!Worklist.empty()) { + SDNode *NI = Worklist.pop_back_val(); + + // Add all operands to the worklist unless they've already been added. + for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) + if (Visited.insert(NI->getOperand(i).getNode())) + Worklist.push_back(NI->getOperand(i).getNode()); + + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. + continue; + + // If this node has already been processed, stop now. + if (NI->getNodeId() != -1) continue; + + SUnit *NodeSUnit = NewSUnit(NI); + + // See if anything is flagged to this node, if so, add them to flagged + // nodes. Nodes can have at most one flag input and one flag output. Flags + // are required to be the last operand and result of a node. + + // Scan up to find flagged preds. + SDNode *N = NI; + while (N->getNumOperands() && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { + N = N->getOperand(N->getNumOperands()-1).getNode(); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + } + + // Scan down to find any flagged succs. + N = NI; + while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { + SDValue FlagVal(N, N->getNumValues()-1); + + // There are either zero or one users of the Flag result. + bool HasFlagUse = false; + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) + if (FlagVal.isOperandOf(*UI)) { + HasFlagUse = true; + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + N = *UI; + break; + } + if (!HasFlagUse) break; + } + + // If there are flag operands involved, N is now the bottom-most node + // of the sequence of nodes that are flagged together. + // Update the SUnit. + NodeSUnit->setNode(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + + // Assign the Latency field of NodeSUnit using target-provided information. + ComputeLatency(NodeSUnit); + } +} + +void ScheduleDAGSDNodes::AddSchedEdges() { + const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = ForceUnitLatencies(); + + // Pass 2: add the preds, succs, etc. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + SUnit *SU = &SUnits[su]; + SDNode *MainNode = SU->getNode(); + + if (MainNode->isMachineOpcode()) { + unsigned Opc = MainNode->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + SU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + SU->isCommutable = true; + } + + // Find all predecessors and successors of the group. + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) { + if (N->isMachineOpcode() && + TII->get(N->getMachineOpcode()).getImplicitDefs()) { + SU->hasPhysRegClobbers = true; + unsigned NumUsed = InstrEmitter::CountResults(N); + while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) + --NumUsed; // Skip over unused values at the end. + if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) + SU->hasPhysRegDefs = true; + } + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *OpN = N->getOperand(i).getNode(); + if (isPassiveNode(OpN)) continue; // Not scheduled. + SUnit *OpSU = &SUnits[OpN->getNodeId()]; + assert(OpSU && "Node has no SUnit!"); + if (OpSU == SU) continue; // In the same group. + + EVT OpVT = N->getOperand(i).getValueType(); + assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); + bool isChain = OpVT == MVT::Other; + + unsigned PhysReg = 0; + int Cost = 1; + // Determine if this is a physical register dependency. + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); + assert((PhysReg == 0 || !isChain) && + "Chain dependence via physreg data?"); + // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler + // emits a copy from the physical register to a virtual register unless + // it requires a cross class copy (cost < 0). That means we are only + // treating "expensive to copy" register dependency as physical register + // dependency. This may change in the future though. + if (Cost >= 0) + PhysReg = 0; + + // If this is a ctrl dep, latency is 1. + unsigned OpLatency = isChain ? 1 : OpSU->Latency; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, + OpLatency, PhysReg); + if (!isChain && !UnitLatencies) { + ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); + ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); + } + + SU->addPred(dep); + } + } + } +} + +/// BuildSchedGraph - Build the SUnit graph from the selection dag that we +/// are input. This SUnit graph is similar to the SelectionDAG, but +/// excludes nodes that aren't interesting to scheduling, and represents +/// flagged together nodes with a single SUnit. +void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { + // Cluster certain nodes which should be scheduled together. + ClusterNodes(); + // Populate the SUnits array. + BuildSchedUnits(); + // Compute all the scheduling dependencies between nodes. + AddSchedEdges(); +} + +void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + // Check to see if the scheduler cares about latencies. + if (ForceUnitLatencies()) { + SU->Latency = 1; + return; + } + + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + if (InstrItins.isEmpty()) { + SU->Latency = 1; + return; + } + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes flagged together into this SUnit. + SU->Latency = 0; + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) + if (N->isMachineOpcode()) { + SU->Latency += InstrItins. + getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass()); + } +} + +void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const{ + // Check to see if the scheduler cares about latencies. + if (ForceUnitLatencies()) + return; + + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + if (InstrItins.isEmpty()) + return; + + if (dep.getKind() != SDep::Data) + return; + + unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); + if (Def->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); + if (DefIdx >= II.getNumDefs()) + return; + int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); + if (DefCycle < 0) + return; + int UseCycle = 1; + if (Use->isMachineOpcode()) { + const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); + UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + } + if (UseCycle >= 0) { + int Latency = DefCycle - UseCycle + 1; + if (Latency >= 0) + dep.setLatency(Latency); + } + } +} + +void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { + if (!SU->getNode()) { + dbgs() << "PHYS REG COPY\n"; + return; + } + + SU->getNode()->dump(DAG); + dbgs() << "\n"; + SmallVector<SDNode *, 4> FlaggedNodes; + for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + dbgs() << " "; + FlaggedNodes.back()->dump(DAG); + dbgs() << "\n"; + FlaggedNodes.pop_back(); + } +} + +namespace { + struct OrderSorter { + bool operator()(const std::pair<unsigned, MachineInstr*> &A, + const std::pair<unsigned, MachineInstr*> &B) { + return A.first < B.first; + } + }; +} + +// ProcessSourceNode - Process nodes with source order numbers. These are added +// to a vector which EmitSchedule uses to determine how to insert dbg_value +// instructions in the right order. +static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, + InstrEmitter &Emitter, + DenseMap<SDValue, unsigned> &VRBaseMap, + SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, + SmallSet<unsigned, 8> &Seen) { + unsigned Order = DAG->GetOrdering(N); + if (!Order || !Seen.insert(Order)) + return; + + MachineBasicBlock *BB = Emitter.getBlock(); + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { + // Did not insert any instruction. + Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); + return; + } + + Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); + if (!N->getHasDebugValue()) + return; + // Opportunistically insert immediate dbg_value uses, i.e. those with source + // order number right after the N. + MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); + SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N); + for (unsigned i = 0, e = DVs.size(); i != e; ++i) { + if (DVs[i]->isInvalidated()) + continue; + unsigned DVOrder = DVs[i]->getOrder(); + if (DVOrder == ++Order) { + MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap); + if (DbgMI) { + Orders.push_back(std::make_pair(DVOrder, DbgMI)); + BB->insert(InsertPos, DbgMI); + } + DVs[i]->setIsInvalidated(); + } + } +} + + +/// EmitSchedule - Emit the machine code in scheduled order. +MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { + InstrEmitter Emitter(BB, InsertPos); + DenseMap<SDValue, unsigned> VRBaseMap; + DenseMap<SUnit*, unsigned> CopyVRBaseMap; + SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; + SmallSet<unsigned, 8> Seen; + bool HasDbg = DAG->hasDebugValues(); + + // If this is the first BB, emit byval parameter dbg_value's. + if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { + SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); + SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); + for (; PDI != PDE; ++PDI) { + MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); + if (DbgMI) + BB->insert(InsertPos, DbgMI); + } + } + + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + EmitNoop(); + continue; + } + + // For pre-regalloc scheduling, create instructions corresponding to the + // SDNode and any flagged SDNodes and append them to the block. + if (!SU->getNode()) { + // Emit a copy. + EmitPhysRegCopy(SU, CopyVRBaseMap); + continue; + } + + SmallVector<SDNode *, 4> FlaggedNodes; + for (SDNode *N = SU->getNode()->getFlaggedNode(); N; + N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + SDNode *N = FlaggedNodes.back(); + Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned, + VRBaseMap); + // Remember the source order of the inserted instruction. + if (HasDbg) + ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); + FlaggedNodes.pop_back(); + } + Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, + VRBaseMap); + // Remember the source order of the inserted instruction. + if (HasDbg) + ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, + Seen); + } + + // Insert all the dbg_values which have not already been inserted in source + // order sequence. + if (HasDbg) { + MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); + + // Sort the source order instructions and use the order to insert debug + // values. + std::sort(Orders.begin(), Orders.end(), OrderSorter()); + + SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); + SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); + // Now emit the rest according to source order. + unsigned LastOrder = 0; + for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { + unsigned Order = Orders[i].first; + MachineInstr *MI = Orders[i].second; + // Insert all SDDbgValue's whose order(s) are before "Order". + if (!MI) + continue; +#ifndef NDEBUG + unsigned LastDIOrder = 0; +#endif + for (; DI != DE && + (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { +#ifndef NDEBUG + assert((*DI)->getOrder() >= LastDIOrder && + "SDDbgValue nodes must be in source order!"); + LastDIOrder = (*DI)->getOrder(); +#endif + if ((*DI)->isInvalidated()) + continue; + MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); + if (DbgMI) { + if (!LastOrder) + // Insert to start of the BB (after PHIs). + BB->insert(BBBegin, DbgMI); + else { + // Insert at the instruction, which may be in a different + // block, if the block was split by a custom inserter. + MachineBasicBlock::iterator Pos = MI; + MI->getParent()->insert(llvm::next(Pos), DbgMI); + } + } + } + LastOrder = Order; + } + // Add trailing DbgValue's before the terminator. FIXME: May want to add + // some of them before one or more conditional branches? + while (DI != DE) { + MachineBasicBlock *InsertBB = Emitter.getBlock(); + MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator(); + if (!(*DI)->isInvalidated()) { + MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap); + if (DbgMI) + InsertBB->insert(Pos, DbgMI); + } + ++DI; + } + } + + BB = Emitter.getBlock(); + InsertPos = Emitter.getInsertPos(); + return BB; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h new file mode 100644 index 0000000..842fc8c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -0,0 +1,122 @@ +//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScheduleDAGSDNodes class, which implements +// scheduling for an SDNode-based dependency graph. +// +//===----------------------------------------------------------------------===// + +#ifndef SCHEDULEDAGSDNODES_H +#define SCHEDULEDAGSDNODES_H + +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SelectionDAG.h" + +namespace llvm { + /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. + /// + /// Edges between SUnits are initially based on edges in the SelectionDAG, + /// and additional edges can be added by the schedulers as heuristics. + /// SDNodes such as Constants, Registers, and a few others that are not + /// interesting to schedulers are not allocated SUnits. + /// + /// SDNodes with MVT::Flag operands are grouped along with the flagged + /// nodes into a single SUnit so that they are scheduled together. + /// + /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output + /// edges. Physical register dependence information is not carried in + /// the DAG and must be handled explicitly by schedulers. + /// + class ScheduleDAGSDNodes : public ScheduleDAG { + public: + SelectionDAG *DAG; // DAG of the current basic block + + explicit ScheduleDAGSDNodes(MachineFunction &mf); + + virtual ~ScheduleDAGSDNodes() {} + + /// Run - perform scheduling. + /// + void Run(SelectionDAG *dag, MachineBasicBlock *bb, + MachineBasicBlock::iterator insertPos); + + /// isPassiveNode - Return true if the node is a non-scheduled leaf. + /// + static bool isPassiveNode(SDNode *Node) { + if (isa<ConstantSDNode>(Node)) return true; + if (isa<ConstantFPSDNode>(Node)) return true; + if (isa<RegisterSDNode>(Node)) return true; + if (isa<GlobalAddressSDNode>(Node)) return true; + if (isa<BasicBlockSDNode>(Node)) return true; + if (isa<FrameIndexSDNode>(Node)) return true; + if (isa<ConstantPoolSDNode>(Node)) return true; + if (isa<JumpTableSDNode>(Node)) return true; + if (isa<ExternalSymbolSDNode>(Node)) return true; + if (isa<BlockAddressSDNode>(Node)) return true; + if (Node->getOpcode() == ISD::EntryToken || + isa<MDNodeSDNode>(Node)) return true; + return false; + } + + /// NewSUnit - Creates a new SUnit and return a ptr to it. + /// + SUnit *NewSUnit(SDNode *N); + + /// Clone - Creates a clone of the specified SUnit. It does not copy the + /// predecessors / successors info nor the temporary scheduling states. + /// + SUnit *Clone(SUnit *N); + + /// BuildSchedGraph - Build the SUnit graph from the selection dag that we + /// are input. This SUnit graph is similar to the SelectionDAG, but + /// excludes nodes that aren't interesting to scheduling, and represents + /// flagged together nodes with a single SUnit. + virtual void BuildSchedGraph(AliasAnalysis *AA); + + /// ComputeLatency - Compute node latency. + /// + virtual void ComputeLatency(SUnit *SU); + + /// ComputeOperandLatency - Override dependence edge latency using + /// operand use/def information + /// + virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + SDep& dep) const { } + + virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const; + + virtual MachineBasicBlock *EmitSchedule(); + + /// Schedule - Order nodes according to selected style, filling + /// in the Sequence member. + /// + virtual void Schedule() = 0; + + virtual void dumpNode(const SUnit *SU) const; + + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + + virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; + + private: + /// ClusterNeighboringLoads - Cluster loads from "near" addresses into + /// combined SUnits. + void ClusterNeighboringLoads(SDNode *Node); + /// ClusterNodes - Cluster certain nodes which should be scheduled together. + /// + void ClusterNodes(); + + /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. + void BuildSchedUnits(); + void AddSchedEdges(); + }; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp new file mode 100644 index 0000000..ad06ebd --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -0,0 +1,6444 @@ +//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "SDNodeOrdering.h" +#include "SDNodeDbgValue.h" +#include "llvm/Constants.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Function.h" +#include "llvm/GlobalAlias.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Mutex.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include <algorithm> +#include <cmath> +using namespace llvm; + +/// makeVTList - Return an instance of the SDVTList struct initialized with the +/// specified members. +static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { + SDVTList Res = {VTs, NumVTs}; + return Res; +} + +static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP format"); + case MVT::f32: return &APFloat::IEEEsingle; + case MVT::f64: return &APFloat::IEEEdouble; + case MVT::f80: return &APFloat::x87DoubleExtended; + case MVT::f128: return &APFloat::IEEEquad; + case MVT::ppcf128: return &APFloat::PPCDoubleDouble; + } +} + +SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {} + +//===----------------------------------------------------------------------===// +// ConstantFPSDNode Class +//===----------------------------------------------------------------------===// + +/// isExactlyValue - We don't rely on operator== working on double values, as +/// it returns true for things that are clearly not equal, like -0.0 and 0.0. +/// As such, this method can be used to do an exact bit-for-bit comparison of +/// two floating point values. +bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { + return getValueAPF().bitwiseIsEqual(V); +} + +bool ConstantFPSDNode::isValueValidForType(EVT VT, + const APFloat& Val) { + assert(VT.isFloatingPoint() && "Can only convert between FP types"); + + // PPC long double cannot be converted to any other type. + if (VT == MVT::ppcf128 || + &Val.getSemantics() == &APFloat::PPCDoubleDouble) + return false; + + // convert modifies in place, so make a copy. + APFloat Val2 = APFloat(Val); + bool losesInfo; + (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + &losesInfo); + return !losesInfo; +} + +//===----------------------------------------------------------------------===// +// ISD Namespace +//===----------------------------------------------------------------------===// + +/// isBuildVectorAllOnes - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are ~0 or undef. +bool ISD::isBuildVectorAllOnes(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BIT_CONVERT) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. + SDValue NotZero = N->getOperand(i); + if (isa<ConstantSDNode>(NotZero)) { + if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue()) + return false; + } else if (isa<ConstantFPSDNode>(NotZero)) { + if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF(). + bitcastToAPInt().isAllOnesValue()) + return false; + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. + for (++i; i != e; ++i) + if (N->getOperand(i) != NotZero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + + +/// isBuildVectorAllZeros - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are 0 or undef. +bool ISD::isBuildVectorAllZeros(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BIT_CONVERT) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-0 + // elements. + SDValue Zero = N->getOperand(i); + if (isa<ConstantSDNode>(Zero)) { + if (!cast<ConstantSDNode>(Zero)->isNullValue()) + return false; + } else if (isa<ConstantFPSDNode>(Zero)) { + if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero()) + return false; + } else + return false; + + // Okay, we have at least one 0 value, check to see if the rest match or are + // undefs. + for (++i; i != e; ++i) + if (N->getOperand(i) != Zero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + +/// isScalarToVector - Return true if the specified node is a +/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low +/// element is not an undef. +bool ISD::isScalarToVector(const SDNode *N) { + if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) + return true; + + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + if (N->getOperand(0).getOpcode() == ISD::UNDEF) + return false; + unsigned NumElems = N->getNumOperands(); + for (unsigned i = 1; i < NumElems; ++i) { + SDValue V = N->getOperand(i); + if (V.getOpcode() != ISD::UNDEF) + return false; + } + return true; +} + +/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) +/// when given the operation for (X op Y). +ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { + // To perform this operation, we just need to swap the L and G bits of the + // operation. + unsigned OldL = (Operation >> 2) & 1; + unsigned OldG = (Operation >> 1) & 1; + return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits + (OldL << 1) | // New G bit + (OldG << 2)); // New L bit. +} + +/// getSetCCInverse - Return the operation corresponding to !(X op Y), where +/// 'op' is a valid SetCC operation. +ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { + unsigned Operation = Op; + if (isInteger) + Operation ^= 7; // Flip L, G, E bits, but not U. + else + Operation ^= 15; // Flip all of the condition bits. + + if (Operation > ISD::SETTRUE2) + Operation &= ~8; // Don't let N and U bits get set. + + return ISD::CondCode(Operation); +} + + +/// isSignedOp - For an integer comparison, return 1 if the comparison is a +/// signed operation and 2 if the result is an unsigned comparison. Return zero +/// if the operation does not depend on the sign of the input (setne and seteq). +static int isSignedOp(ISD::CondCode Opcode) { + switch (Opcode) { + default: llvm_unreachable("Illegal integer setcc operation!"); + case ISD::SETEQ: + case ISD::SETNE: return 0; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return 1; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return 2; + } +} + +/// getSetCCOrOperation - Return the result of a logical OR between different +/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function +/// returns SETCC_INVALID if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed integer setcc with an unsigned integer setcc. + return ISD::SETCC_INVALID; + + unsigned Op = Op1 | Op2; // Combine all of the condition bits. + + // If the N and U bits get set then the resultant comparison DOES suddenly + // care about orderedness, and is true when ordered. + if (Op > ISD::SETTRUE2) + Op &= ~16; // Clear the U bit if the N bit is set. + + // Canonicalize illegal integer setcc's. + if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT + Op = ISD::SETNE; + + return ISD::CondCode(Op); +} + +/// getSetCCAndOperation - Return the result of a logical AND between different +/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This +/// function returns zero if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed setcc with an unsigned setcc. + return ISD::SETCC_INVALID; + + // Combine all of the condition bits. + ISD::CondCode Result = ISD::CondCode(Op1 & Op2); + + // Canonicalize illegal integer setcc's. + if (isInteger) { + switch (Result) { + default: break; + case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT + case ISD::SETOEQ: // SETEQ & SETU[LG]E + case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE + case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE + case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE + } + } + + return Result; +} + +//===----------------------------------------------------------------------===// +// SDNode Profile Support +//===----------------------------------------------------------------------===// + +/// AddNodeIDOpcode - Add the node opcode to the NodeID data. +/// +static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { + ID.AddInteger(OpC); +} + +/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them +/// solely with their pointer. +static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { + ID.AddPointer(VTList.VTs); +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + const SDValue *Ops, unsigned NumOps) { + for (; NumOps; --NumOps, ++Ops) { + ID.AddPointer(Ops->getNode()); + ID.AddInteger(Ops->getResNo()); + } +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + const SDUse *Ops, unsigned NumOps) { + for (; NumOps; --NumOps, ++Ops) { + ID.AddPointer(Ops->getNode()); + ID.AddInteger(Ops->getResNo()); + } +} + +static void AddNodeIDNode(FoldingSetNodeID &ID, + unsigned short OpC, SDVTList VTList, + const SDValue *OpList, unsigned N) { + AddNodeIDOpcode(ID, OpC); + AddNodeIDValueTypes(ID, VTList); + AddNodeIDOperands(ID, OpList, N); +} + +/// AddNodeIDCustom - If this is an SDNode with special info, add this info to +/// the NodeID data. +static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { + switch (N->getOpcode()) { + case ISD::TargetExternalSymbol: + case ISD::ExternalSymbol: + llvm_unreachable("Should only be used on nodes with operands"); + default: break; // Normal nodes don't need extra info. + case ISD::TargetConstant: + case ISD::Constant: + ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue()); + break; + case ISD::TargetConstantFP: + case ISD::ConstantFP: { + ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); + break; + } + case ISD::TargetGlobalAddress: + case ISD::GlobalAddress: + case ISD::TargetGlobalTLSAddress: + case ISD::GlobalTLSAddress: { + const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); + ID.AddPointer(GA->getGlobal()); + ID.AddInteger(GA->getOffset()); + ID.AddInteger(GA->getTargetFlags()); + break; + } + case ISD::BasicBlock: + ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock()); + break; + case ISD::Register: + ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); + break; + + case ISD::SRCVALUE: + ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); + break; + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); + break; + case ISD::JumpTable: + case ISD::TargetJumpTable: + ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); + ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags()); + break; + case ISD::ConstantPool: + case ISD::TargetConstantPool: { + const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); + ID.AddInteger(CP->getAlignment()); + ID.AddInteger(CP->getOffset()); + if (CP->isMachineConstantPoolEntry()) + CP->getMachineCPVal()->AddSelectionDAGCSEId(ID); + else + ID.AddPointer(CP->getConstVal()); + ID.AddInteger(CP->getTargetFlags()); + break; + } + case ISD::LOAD: { + const LoadSDNode *LD = cast<LoadSDNode>(N); + ID.AddInteger(LD->getMemoryVT().getRawBits()); + ID.AddInteger(LD->getRawSubclassData()); + break; + } + case ISD::STORE: { + const StoreSDNode *ST = cast<StoreSDNode>(N); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + break; + } + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: { + const AtomicSDNode *AT = cast<AtomicSDNode>(N); + ID.AddInteger(AT->getMemoryVT().getRawBits()); + ID.AddInteger(AT->getRawSubclassData()); + break; + } + case ISD::VECTOR_SHUFFLE: { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); + i != e; ++i) + ID.AddInteger(SVN->getMaskElt(i)); + break; + } + case ISD::TargetBlockAddress: + case ISD::BlockAddress: { + ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress()); + ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags()); + break; + } + } // end switch (N->getOpcode()) +} + +/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID +/// data. +static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { + AddNodeIDOpcode(ID, N->getOpcode()); + // Add the return value info. + AddNodeIDValueTypes(ID, N->getVTList()); + // Add the operand info. + AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands()); + + // Handle SDNode leafs with special info. + AddNodeIDCustom(ID, N); +} + +/// encodeMemSDNodeFlags - Generic routine for computing a value for use in +/// the CSE map that carries volatility, temporalness, indexing mode, and +/// extension/truncation information. +/// +static inline unsigned +encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, + bool isNonTemporal) { + assert((ConvType & 3) == ConvType && + "ConvType may not require more than 2 bits!"); + assert((AM & 7) == AM && + "AM may not require more than 3 bits!"); + return ConvType | + (AM << 2) | + (isVolatile << 5) | + (isNonTemporal << 6); +} + +//===----------------------------------------------------------------------===// +// SelectionDAG Class +//===----------------------------------------------------------------------===// + +/// doNotCSE - Return true if CSE should not be performed for this node. +static bool doNotCSE(SDNode *N) { + if (N->getValueType(0) == MVT::Flag) + return true; // Never CSE anything that produces a flag. + + switch (N->getOpcode()) { + default: break; + case ISD::HANDLENODE: + case ISD::EH_LABEL: + return true; // Never CSE these nodes. + } + + // Check that remaining values produced are not flags. + for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Flag) + return true; // Never CSE anything that produces a flag. + + return false; +} + +/// RemoveDeadNodes - This method deletes all unreachable nodes in the +/// SelectionDAG. +void SelectionDAG::RemoveDeadNodes() { + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted. + HandleSDNode Dummy(getRoot()); + + SmallVector<SDNode*, 128> DeadNodes; + + // Add all obviously-dead nodes to the DeadNodes worklist. + for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) + if (I->use_empty()) + DeadNodes.push_back(I); + + RemoveDeadNodes(DeadNodes); + + // If the root changed (e.g. it was a dead load, update the root). + setRoot(Dummy.getValue()); +} + +/// RemoveDeadNodes - This method deletes the unreachable nodes in the +/// given list, and any nodes that become unreachable as a result. +void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes, + DAGUpdateListener *UpdateListener) { + + // Process the worklist, deleting the nodes and adding their uses to the + // worklist. + while (!DeadNodes.empty()) { + SDNode *N = DeadNodes.pop_back_val(); + + if (UpdateListener) + UpdateListener->NodeDeleted(N, 0); + + // Take the node out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Next, brutally remove the operand list. This is safe to do, as there are + // no cycles in the graph. + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Operand = Use.getNode(); + Use.set(SDValue()); + + // Now that we removed this operand, see if there are no uses of it left. + if (Operand->use_empty()) + DeadNodes.push_back(Operand); + } + + DeallocateNode(N); + } +} + +void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ + SmallVector<SDNode*, 16> DeadNodes(1, N); + RemoveDeadNodes(DeadNodes, UpdateListener); +} + +void SelectionDAG::DeleteNode(SDNode *N) { + // First take this out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Finally, remove uses due to operands of this node, remove from the + // AllNodes list, and delete the node. + DeleteNodeNotInCSEMaps(N); +} + +void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { + assert(N != AllNodes.begin() && "Cannot delete the entry node!"); + assert(N->use_empty() && "Cannot delete a node that is not dead!"); + + // Drop all of the operands and decrement used node's use counts. + N->DropOperands(); + + DeallocateNode(N); +} + +void SelectionDAG::DeallocateNode(SDNode *N) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + + // Set the opcode to DELETED_NODE to help catch bugs when node + // memory is reallocated. + N->NodeType = ISD::DELETED_NODE; + + NodeAllocator.Deallocate(AllNodes.remove(N)); + + // Remove the ordering of this node. + Ordering->remove(N); + + // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. + SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N); + for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) + DbgVals[i]->setIsInvalidated(); +} + +/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that +/// correspond to it. This is useful when we're about to delete or repurpose +/// the node. We don't want future request for structurally identical nodes +/// to return N anymore. +bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { + bool Erased = false; + switch (N->getOpcode()) { + case ISD::EntryToken: + llvm_unreachable("EntryToken should not be in CSEMaps!"); + return false; + case ISD::HANDLENODE: return false; // noop. + case ISD::CONDCODE: + assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && + "Cond code doesn't exist!"); + Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0; + CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0; + break; + case ISD::ExternalSymbol: + Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); + break; + case ISD::TargetExternalSymbol: { + ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N); + Erased = TargetExternalSymbols.erase( + std::pair<std::string,unsigned char>(ESN->getSymbol(), + ESN->getTargetFlags())); + break; + } + case ISD::VALUETYPE: { + EVT VT = cast<VTSDNode>(N)->getVT(); + if (VT.isExtended()) { + Erased = ExtendedValueTypeNodes.erase(VT); + } else { + Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0; + ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0; + } + break; + } + default: + // Remove it from the CSE Map. + Erased = CSEMap.RemoveNode(N); + break; + } +#ifndef NDEBUG + // Verify that the node was actually in one of the CSE maps, unless it has a + // flag result (which cannot be CSE'd) or is one of the special cases that are + // not subject to CSE. + if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && + !N->isMachineOpcode() && !doNotCSE(N)) { + N->dump(this); + dbgs() << "\n"; + llvm_unreachable("Node is not in map!"); + } +#endif + return Erased; +} + +/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE +/// maps and modified in place. Add it back to the CSE maps, unless an identical +/// node already exists, in which case transfer all its users to the existing +/// node. This transfer can potentially trigger recursive merging. +/// +void +SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N, + DAGUpdateListener *UpdateListener) { + // For node types that aren't CSE'd, just act as if no identical node + // already exists. + if (!doNotCSE(N)) { + SDNode *Existing = CSEMap.GetOrInsertNode(N); + if (Existing != N) { + // If there was already an existing matching node, use ReplaceAllUsesWith + // to replace the dead one with the existing one. This can cause + // recursive merging of other unrelated nodes down the line. + ReplaceAllUsesWith(N, Existing, UpdateListener); + + // N is now dead. Inform the listener if it exists and delete it. + if (UpdateListener) + UpdateListener->NodeDeleted(N, Existing); + DeleteNodeNotInCSEMaps(N); + return; + } + } + + // If the node doesn't already exist, we updated it. Inform a listener if + // it exists. + if (UpdateListener) + UpdateListener->NodeUpdated(N); +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + SDValue Ops[] = { Op }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); + AddNodeIDCustom(ID, N); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + return Node; +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + SDValue Op1, SDValue Op2, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + SDValue Ops[] = { Op1, Op2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); + AddNodeIDCustom(ID, N); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + return Node; +} + + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + const SDValue *Ops,unsigned NumOps, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); + AddNodeIDCustom(ID, N); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + return Node; +} + +/// VerifyNode - Sanity check the given node. Aborts if it is invalid. +void SelectionDAG::VerifyNode(SDNode *N) { + switch (N->getOpcode()) { + default: + break; + case ISD::BUILD_PAIR: { + EVT VT = N->getValueType(0); + assert(N->getNumValues() == 1 && "Too many results!"); + assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && + "Wrong return type!"); + assert(N->getNumOperands() == 2 && "Wrong number of operands!"); + assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && + "Mismatched operand types!"); + assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && + "Wrong operand type!"); + assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && + "Wrong return type size"); + break; + } + case ISD::BUILD_VECTOR: { + assert(N->getNumValues() == 1 && "Too many results!"); + assert(N->getValueType(0).isVector() && "Wrong return type!"); + assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && + "Wrong number of operands!"); + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) + assert((I->getValueType() == EltVT || + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); + break; + } + } +} + +/// getEVTAlignment - Compute the default alignment value for the +/// given type. +/// +unsigned SelectionDAG::getEVTAlignment(EVT VT) const { + const Type *Ty = VT == MVT::iPTR ? + PointerType::get(Type::getInt8Ty(*getContext()), 0) : + VT.getTypeForEVT(*getContext()); + + return TLI.getTargetData()->getABITypeAlignment(Ty); +} + +// EntryNode could meaningfully have debug info if we can find it... +SelectionDAG::SelectionDAG(const TargetMachine &tm) + : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), + EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), + Root(getEntryNode()), Ordering(0) { + AllNodes.push_back(&EntryNode); + Ordering = new SDNodeOrdering(); + DbgInfo = new SDDbgInfo(); +} + +void SelectionDAG::init(MachineFunction &mf) { + MF = &mf; + Context = &mf.getFunction()->getContext(); +} + +SelectionDAG::~SelectionDAG() { + allnodes_clear(); + delete Ordering; + delete DbgInfo; +} + +void SelectionDAG::allnodes_clear() { + assert(&*AllNodes.begin() == &EntryNode); + AllNodes.remove(AllNodes.begin()); + while (!AllNodes.empty()) + DeallocateNode(AllNodes.begin()); +} + +void SelectionDAG::clear() { + allnodes_clear(); + OperandAllocator.Reset(); + CSEMap.clear(); + + ExtendedValueTypeNodes.clear(); + ExternalSymbols.clear(); + TargetExternalSymbols.clear(); + std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), + static_cast<CondCodeSDNode*>(0)); + std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), + static_cast<SDNode*>(0)); + + EntryNode.UseList = 0; + AllNodes.push_back(&EntryNode); + Root = getEntryNode(); + Ordering->clear(); + DbgInfo->clear(); +} + +SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::SIGN_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::ZERO_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { + assert(!VT.isVector() && + "getZeroExtendInReg should use the vector element type instead of " + "the vector type!"); + if (Op.getValueType() == VT) return Op; + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Imm = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); + return getNode(ISD::AND, DL, Op.getValueType(), Op, + getConstant(Imm, Op.getValueType())); +} + +/// getNOT - Create a bitwise NOT operation as (XOR Val, -1). +/// +SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) { + EVT EltVT = VT.getScalarType(); + SDValue NegOne = + getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + return getNode(ISD::XOR, DL, VT, Val, NegOne); +} + +SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) { + EVT EltVT = VT.getScalarType(); + assert((EltVT.getSizeInBits() >= 64 || + (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && + "getConstant with a uint64_t value that doesn't fit in the type!"); + return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT); +} + +SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) { + return getConstant(*ConstantInt::get(*Context, Val), VT, isT); +} + +SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { + assert(VT.isInteger() && "Cannot create FP integer constant!"); + + EVT EltVT = VT.getScalarType(); + assert(Val.getBitWidth() == EltVT.getSizeInBits() && + "APInt size does not match type size!"); + + unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + ID.AddPointer(&Val); + void *IP = 0; + SDNode *N = NULL; + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + + if (!N) { + N = new (NodeAllocator) ConstantSDNode(isT, &Val, EltVT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector<SDValue, 8> Ops; + Ops.assign(VT.getVectorNumElements(), Result); + Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size()); + } + return Result; +} + +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { + return getConstant(Val, TLI.getPointerTy(), isTarget); +} + + +SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) { + return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget); +} + +SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ + assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); + + EVT EltVT = VT.getScalarType(); + + // Do the map lookup using the actual bit pattern for the floating point + // value, so that we don't have problems with 0.0 comparing equal to -0.0, and + // we don't have issues with SNANs. + unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + ID.AddPointer(&V); + void *IP = 0; + SDNode *N = NULL; + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + + if (!N) { + N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector<SDValue, 8> Ops; + Ops.assign(VT.getVectorNumElements(), Result); + // FIXME DebugLoc info might be appropriate here + Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size()); + } + return Result; +} + +SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { + EVT EltVT = VT.getScalarType(); + if (EltVT==MVT::f32) + return getConstantFP(APFloat((float)Val), VT, isTarget); + else if (EltVT==MVT::f64) + return getConstantFP(APFloat(Val), VT, isTarget); + else if (EltVT==MVT::f80 || EltVT==MVT::f128) { + bool ignored; + APFloat apf = APFloat(Val); + apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + &ignored); + return getConstantFP(apf, VT, isTarget); + } else { + assert(0 && "Unsupported type in getConstantFP"); + return SDValue(); + } +} + +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, + EVT VT, int64_t Offset, + bool isTargetGA, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTargetGA) && + "Cannot set target flags on target-independent globals"); + + // Truncate (with sign-extension) the offset value to the pointer size. + EVT PTy = TLI.getPointerTy(); + unsigned BitWidth = PTy.getSizeInBits(); + if (BitWidth < 64) + Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth)); + + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + if (!GVar) { + // If GV is an alias then use the aliasee for determining thread-localness. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); + } + + unsigned Opc; + if (GVar && GVar->isThreadLocal()) + Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; + else + Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddPointer(GV); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT, + Offset, TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { + unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(FI); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent jump tables"); + unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(JTI); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, + TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, + unsigned Alignment, int Offset, + bool isTarget, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent globals"); + if (Alignment == 0) + Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + ID.AddPointer(C); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, + Alignment, TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + +SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, + unsigned Alignment, int Offset, + bool isTarget, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent globals"); + if (Alignment == 0) + Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + C->AddSelectionDAGCSEId(ID); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, + Alignment, TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); + ID.AddPointer(MBB); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getValueType(EVT VT) { + if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >= + ValueTypeNodes.size()) + ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1); + + SDNode *&N = VT.isExtended() ? + ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; + + if (N) return SDValue(N, 0); + N = new (NodeAllocator) VTSDNode(VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { + SDNode *&N = ExternalSymbols[Sym]; + if (N) return SDValue(N, 0); + N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, + unsigned char TargetFlags) { + SDNode *&N = + TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym, + TargetFlags)]; + if (N) return SDValue(N, 0); + N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { + if ((unsigned)Cond >= CondCodeNodes.size()) + CondCodeNodes.resize(Cond+1); + + if (CondCodeNodes[Cond] == 0) { + CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); + CondCodeNodes[Cond] = N; + AllNodes.push_back(N); + } + + return SDValue(CondCodeNodes[Cond], 0); +} + +// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in +// the shuffle mask M that point at N1 to point at N2, and indices that point +// N2 to point at N1. +static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { + std::swap(N1, N2); + int NElts = M.size(); + for (int i = 0; i != NElts; ++i) { + if (M[i] >= NElts) + M[i] -= NElts; + else if (M[i] >= 0) + M[i] += NElts; + } +} + +SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, + SDValue N2, const int *Mask) { + assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); + assert(VT.isVector() && N1.getValueType().isVector() && + "Vector Shuffle VTs must be a vectors"); + assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() + && "Vector Shuffle VTs must have same element type"); + + // Canonicalize shuffle undef, undef -> undef + if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // Validate that all indices in Mask are within the range of the elements + // input to the shuffle. + unsigned NElts = VT.getVectorNumElements(); + SmallVector<int, 8> MaskVec; + for (unsigned i = 0; i != NElts; ++i) { + assert(Mask[i] < (int)(NElts * 2) && "Index out of range"); + MaskVec.push_back(Mask[i]); + } + + // Canonicalize shuffle v, v -> v, undef + if (N1 == N2) { + N2 = getUNDEF(VT); + for (unsigned i = 0; i != NElts; ++i) + if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts; + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N1.getOpcode() == ISD::UNDEF) + commuteShuffle(N1, N2, MaskVec); + + // Canonicalize all index into lhs, -> shuffle lhs, undef + // Canonicalize all index into rhs, -> shuffle rhs, undef + bool AllLHS = true, AllRHS = true; + bool N2Undef = N2.getOpcode() == ISD::UNDEF; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= (int)NElts) { + if (N2Undef) + MaskVec[i] = -1; + else + AllLHS = false; + } else if (MaskVec[i] >= 0) { + AllRHS = false; + } + } + if (AllLHS && AllRHS) + return getUNDEF(VT); + if (AllLHS && !N2Undef) + N2 = getUNDEF(VT); + if (AllRHS) { + N1 = getUNDEF(VT); + commuteShuffle(N1, N2, MaskVec); + } + + // If Identity shuffle, or all shuffle in to undef, return that node. + bool AllUndef = true; + bool Identity = true; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; + if (MaskVec[i] >= 0) AllUndef = false; + } + if (Identity && NElts == N1.getValueType().getVectorNumElements()) + return N1; + if (AllUndef) + return getUNDEF(VT); + + FoldingSetNodeID ID; + SDValue Ops[2] = { N1, N2 }; + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); + for (unsigned i = 0; i != NElts; ++i) + ID.AddInteger(MaskVec[i]); + + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + // Allocate the mask array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the NodeAllocator is released. + int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); + memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); + + ShuffleVectorSDNode *N = + new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, + SDValue Val, SDValue DTy, + SDValue STy, SDValue Rnd, SDValue Sat, + ISD::CvtCode Code) { + // If the src and dest types are the same and the conversion is between + // integer types of the same sign or two floats, no conversion is necessary. + if (DTy == STy && + (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF)) + return Val; + + FoldingSetNodeID ID; + SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; + AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5, + Code); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); + ID.AddInteger(RegNo); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { + FoldingSetNodeID ID; + SDValue Ops[] = { Root }; + AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1); + ID.AddPointer(Label); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + +SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, + bool isTarget, + unsigned char TargetFlags) { + unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddPointer(BA); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getSrcValue(const Value *V) { + assert((!V || V->getType()->isPointerTy()) && + "SrcValue is not a pointer?"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0); + ID.AddPointer(V); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) SrcValueSDNode(V); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +/// getMDNode - Return an MDNodeSDNode which holds an MDNode. +SDValue SelectionDAG::getMDNode(const MDNode *MD) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0); + ID.AddPointer(MD); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + +/// getShiftAmountOperand - Return the specified value casted to +/// the target's desired shift amount type. +SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { + EVT OpTy = Op.getValueType(); + MVT ShTy = TLI.getShiftAmountTy(); + if (OpTy == ShTy || OpTy.isVector()) return Op; + + ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; + return getNode(Opcode, Op.getDebugLoc(), ShTy, Op); +} + +/// CreateStackTemporary - Create a stack temporary, suitable for holding the +/// specified value type. +SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + unsigned ByteSize = VT.getStoreSize(); + const Type *Ty = VT.getTypeForEVT(*getContext()); + unsigned StackAlign = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); + + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + return getFrameIndex(FrameIdx, TLI.getPointerTy()); +} + +/// CreateStackTemporary - Create a stack temporary suitable for holding +/// either of the specified value types. +SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { + unsigned Bytes = std::max(VT1.getStoreSizeInBits(), + VT2.getStoreSizeInBits())/8; + const Type *Ty1 = VT1.getTypeForEVT(*getContext()); + const Type *Ty2 = VT2.getTypeForEVT(*getContext()); + const TargetData *TD = TLI.getTargetData(); + unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), + TD->getPrefTypeAlignment(Ty2)); + + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false); + return getFrameIndex(FrameIdx, TLI.getPointerTy()); +} + +SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, + SDValue N2, ISD::CondCode Cond, DebugLoc dl) { + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return getConstant(1, VT); + + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETO: + case ISD::SETUO: + case ISD::SETUEQ: + case ISD::SETUNE: + assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!"); + break; + } + + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) { + const APInt &C2 = N2C->getAPIntValue(); + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + + switch (Cond) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETEQ: return getConstant(C1 == C2, VT); + case ISD::SETNE: return getConstant(C1 != C2, VT); + case ISD::SETULT: return getConstant(C1.ult(C2), VT); + case ISD::SETUGT: return getConstant(C1.ugt(C2), VT); + case ISD::SETULE: return getConstant(C1.ule(C2), VT); + case ISD::SETUGE: return getConstant(C1.uge(C2), VT); + case ISD::SETLT: return getConstant(C1.slt(C2), VT); + case ISD::SETGT: return getConstant(C1.sgt(C2), VT); + case ISD::SETLE: return getConstant(C1.sle(C2), VT); + case ISD::SETGE: return getConstant(C1.sge(C2), VT); + } + } + } + if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) { + if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) { + // No compile time operations on this type yet. + if (N1C->getValueType(0) == MVT::ppcf128) + return SDValue(); + + APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); + switch (Cond) { + default: break; + case ISD::SETEQ: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT); + case ISD::SETNE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, VT); + case ISD::SETLT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT); + case ISD::SETGT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT); + case ISD::SETLE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, VT); + case ISD::SETGE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, VT); + case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT); + case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT); + case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, VT); + case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT); + case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, VT); + case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, VT); + case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT); + case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT); + } + } else { + // Ensure that the constant occurs on the RHS. + return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond)); + } + } + + // Could not fold it. + return SDValue(); +} + +/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We +/// use this predicate to simplify operations downstream. +bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { + // This predicate is not safe for vector operations. + if (Op.getValueType().isVector()) + return false; + + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, + unsigned Depth) const { + APInt KnownZero, KnownOne; + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; +} + +/// ComputeMaskedBits - Determine which of the bits specified in Mask are +/// known to be either zero or one and return them in the KnownZero/KnownOne +/// bitsets. This code only analyzes bits in Mask, in order to short-circuit +/// processing. +void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) const { + unsigned BitWidth = Mask.getBitWidth(); + assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() && + "Mask size mismatches value type size!"); + + KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + if (Depth == 6 || Mask == 0) + return; // Limit search depth. + + APInt KnownZero2, KnownOne2; + + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask; + KnownZero = ~KnownOne & Mask; + return; + case ISD::AND: + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero, + KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + case ISD::OR: + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne, + KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + case ISD::XOR: { + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case ISD::MUL: { + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clear(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero &= Mask; + return; + } + case ISD::UDIV: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be less than the denominator. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clear(); + KnownZero2.clear(); + ComputeMaskedBits(Op.getOperand(1), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + if (RHSUnknownLeadingOnes != BitWidth) + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + return; + } + case ISD::SELECT: + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SELECT_CC: + ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + return; + // The boolean result conforms to getBooleanContents. Fall through. + case ISD::SETCC: + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + case ISD::SHL: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt), + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= ShAmt; + KnownOne <<= ShAmt; + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt); + } + return; + case ISD::SRL: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt), + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + KnownZero |= HighBits; // High bits known zero. + } + return; + case ISD::SRA: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + APInt InDemandedMask = (Mask << ShAmt); + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + if (HighBits.getBoolValue()) + InDemandedMask |= APInt::getSignBit(BitWidth); + + ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bits. + APInt SignBit = APInt::getSignBit(BitWidth); + SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask. + + if (KnownZero.intersects(SignBit)) { + KnownZero |= HighBits; // New bits are known zero. + } else if (KnownOne.intersects(SignBit)) { + KnownOne |= HighBits; // New bits are known one. + } + } + return; + case ISD::SIGN_EXTEND_INREG: { + EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + unsigned EBits = EVT.getScalarType().getSizeInBits(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask; + + APInt InSignBit = APInt::getSignBit(EBits); + APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits); + + // If the sign extended bits are demanded, we know that the sign + // bit is demanded. + InSignBit.zext(BitWidth); + if (NewBits.getBoolValue()) + InputDemandedBits |= InSignBit; + + ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear + KnownZero |= NewBits; + KnownOne &= ~NewBits; + } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + return; + } + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownOne.clear(); + return; + } + case ISD::LOAD: { + if (ISD::isZEXTLoad(Op.getNode())) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + EVT VT = LD->getMemoryVT(); + unsigned MemBits = VT.getScalarType().getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; + } + return; + } + case ISD::ZERO_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; + APInt InMask = Mask; + InMask.trunc(InBits); + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + KnownZero |= NewBits; + return; + } + case ISD::SIGN_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + APInt InSignBit = APInt::getSignBit(InBits); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; + APInt InMask = Mask; + InMask.trunc(InBits); + + // If any of the sign extended bits are demanded, we know that the sign + // bit is demanded. Temporarily set this bit in the mask for our callee. + if (NewBits.getBoolValue()) + InMask |= InSignBit; + + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + + // Note if the sign bit is known to be zero or one. + bool SignBitKnownZero = KnownZero.isNegative(); + bool SignBitKnownOne = KnownOne.isNegative(); + assert(!(SignBitKnownZero && SignBitKnownOne) && + "Sign bit can't be known to be both zero and one!"); + + // If the sign bit wasn't actually demanded by our caller, we don't + // want it set in the KnownZero and KnownOne result values. Reset the + // mask and reapply it to the result values. + InMask = Mask; + InMask.trunc(InBits); + KnownZero &= InMask; + KnownOne &= InMask; + + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + + // If the sign bit is known zero or one, the top bits match. + if (SignBitKnownZero) + KnownZero |= NewBits; + else if (SignBitKnownOne) + KnownOne |= NewBits; + return; + } + case ISD::ANY_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + APInt InMask = Mask; + InMask.trunc(InBits); + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + return; + } + case ISD::TRUNCATE: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + APInt InMask = Mask; + InMask.zext(InBits); + KnownZero.zext(InBits); + KnownOne.zext(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.trunc(BitWidth); + KnownOne.trunc(BitWidth); + break; + } + case ISD::AssertZext: { + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); + ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, + KnownOne, Depth+1); + KnownZero |= (~InMask) & Mask; + return; + } + case ISD::FGETSIGN: + // All bits are zero except the low bit. + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + + case ISD::SUB: { + if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (CLHS->getAPIntValue().isNonNegative()) { + unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2, + Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + } + } + } + } + // fall through + case ISD::ADD: { + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + APInt Mask2 = APInt::getLowBitsSet(BitWidth, + BitWidth - Mask.countLeadingZeros()); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); + + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + KnownZeroOut = std::min(KnownZeroOut, + KnownZero2.countTrailingOnes()); + + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); + return; + } + case ISD::SREM: + if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue().abs(); + if (RA.isPowerOf2()) { + APInt LowBits = RA - 1; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); + + // The low bits of the first operand are unchanged by the srem. + KnownZero = KnownZero2 & LowBits; + KnownOne = KnownOne2 & LowBits; + + // If the first operand is non-negative or has all low bits zero, then + // the upper bits are all zero. + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero |= ~LowBits; + + // If the first operand is negative and not all low bits are zero, then + // the upper bits are all one. + if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) + KnownOne |= ~LowBits; + + KnownZero &= Mask; + KnownOne &= Mask; + + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + } + } + return; + case ISD::UREM: { + if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue(); + if (RA.isPowerOf2()) { + APInt LowBits = (RA - 1); + APInt Mask2 = LowBits & Mask; + KnownZero |= ~LowBits & Mask; + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne, + Depth+1); + ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, + Depth+1); + + uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clear(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + return; + } + default: + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: + TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, + Depth); + } + return; + } +} + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "SRA X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ + EVT VT = Op.getValueType(); + assert(VT.isInteger() && "Invalid VT!"); + unsigned VTBits = VT.getScalarType().getSizeInBits(); + unsigned Tmp, Tmp2; + unsigned FirstAnswer = 1; + + if (Depth == 6) + return 1; // Limit search depth. + + switch (Op.getOpcode()) { + default: break; + case ISD::AssertSext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::AssertZext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp; + + case ISD::Constant: { + const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue(); + // If negative, return # leading ones. + if (Val.isNegative()) + return Val.countLeadingOnes(); + + // Return # leading zeros. + return Val.countLeadingZeros(); + } + + case ISD::SIGN_EXTEND: + Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; + + case ISD::SIGN_EXTEND_INREG: + // Max of the input and what this extends. + Tmp = + cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits(); + Tmp = VTBits-Tmp+1; + + Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); + return std::max(Tmp, Tmp2); + + case ISD::SRA: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + // SRA X, C -> adds C sign bits. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + Tmp += C->getZExtValue(); + if (Tmp > VTBits) Tmp = VTBits; + } + return Tmp; + case ISD::SHL: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (C->getZExtValue() >= VTBits || // Bad shift. + C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. + return Tmp - C->getZExtValue(); + } + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // ComputeMaskedBits, and pick whichever answer is better. + } + break; + + case ISD::SELECT: + Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); + return std::min(Tmp, Tmp2); + + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + break; + // The boolean result conforms to getBooleanContents. Fall through. + case ISD::SETCC: + // If setcc returns 0/-1, all bits are sign bits. + if (TLI.getBooleanContents() == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned RotAmt = C->getZExtValue() & (VTBits-1); + + // Handle rotate right by N like a rotate left by 32-N. + if (Op.getOpcode() == ISD::ROTR) + RotAmt = (VTBits-RotAmt) & (VTBits-1); + + // If we aren't rotating out all of the known-in sign bits, return the + // number that are left. This handles rotl(sext(x), 1) for example. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp > RotAmt+1) return Tmp-RotAmt; + } + break; + case ISD::ADD: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + if (CRHS->isAllOnesValue()) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)) == Mask) + return VTBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero.isNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + break; + + case ISD::SUB: + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) + if (CLHS->isNullValue()) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)) == Mask) + return VTBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero.isNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + break; + case ISD::TRUNCATE: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + } + + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp; + } + } + + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) { + unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth); + if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + + if (KnownZero.isNegative()) { // sign bit is 0 + Mask = KnownZero; + } else if (KnownOne.isNegative()) { // sign bit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return FirstAnswer; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask = ~Mask; + Mask <<= Mask.getBitWidth()-VTBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); +} + +bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { + // If we're told that NaNs won't happen, assume they won't. + if (NoNaNsFPMath) + return true; + + // If the value is a constant, we can obviously see if it is a NaN or not. + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) + return !C->getValueAPF().isNaN(); + + // TODO: Recognize more cases here. + + return false; +} + +bool SelectionDAG::isKnownNeverZero(SDValue Op) const { + // If the value is a constant, we can obviously see if it is a zero or not. + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) + return !C->isZero(); + + // TODO: Recognize more cases here. + + return false; +} + +bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { + // Check the obvious case. + if (A == B) return true; + + // For for negative and positive zero. + if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) + if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) + if (CA->isZero() && CB->isZero()) return true; + + // Otherwise they may not be equal. + return false; +} + +bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { + GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); + if (!GA) return false; + if (GA->getOffset() != 0) return false; + const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()); + if (!GV) return false; + return MF->getMMI().hasDebugInfo(); +} + + +/// getNode - Gets or creates the specified node. +/// +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT)); + CSEMap.InsertNode(N, IP); + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + EVT VT, SDValue Operand) { + // Constant fold unary operations with an integer constant operand. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { + const APInt &Val = C->getAPIntValue(); + switch (Opcode) { + default: break; + case ISD::SIGN_EXTEND: + return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT); + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::TRUNCATE: + return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + const uint64_t zero[] = {0, 0}; + // No compile time operations on ppcf128. + if (VT == MVT::ppcf128) break; + APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); + (void)apf.convertFromAPInt(Val, + Opcode==ISD::SINT_TO_FP, + APFloat::rmNearestTiesToEven); + return getConstantFP(apf, VT); + } + case ISD::BIT_CONVERT: + if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) + return getConstantFP(Val.bitsToFloat(), VT); + else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + return getConstantFP(Val.bitsToDouble(), VT); + break; + case ISD::BSWAP: + return getConstant(Val.byteSwap(), VT); + case ISD::CTPOP: + return getConstant(Val.countPopulation(), VT); + case ISD::CTLZ: + return getConstant(Val.countLeadingZeros(), VT); + case ISD::CTTZ: + return getConstant(Val.countTrailingZeros(), VT); + } + } + + // Constant fold unary operations with a floating point constant operand. + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) { + APFloat V = C->getValueAPF(); // make copy + if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) { + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, VT); + case ISD::FP_ROUND: + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + integerPart x[2]; + bool ignored; + assert(integerPartWidth >= 64); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), + Opcode==ISD::FP_TO_SINT, + APFloat::rmTowardZero, &ignored); + if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual + break; + APInt api(VT.getSizeInBits(), 2, x); + return getConstant(api, VT); + } + case ISD::BIT_CONVERT: + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; + } + } + } + + unsigned OpOpcode = Operand.getNode()->getOpcode(); + switch (Opcode) { + case ISD::TokenFactor: + case ISD::MERGE_VALUES: + case ISD::CONCAT_VECTORS: + return Operand; // Factor, merge or concat of one node? No need. + case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node"); + case ISD::FP_EXTEND: + assert(VT.isFloatingPoint() && + Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); + if (Operand.getValueType() == VT) return Operand; // noop conversion. + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + if (Operand.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SIGN_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid SIGN_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid sext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + break; + case ISD::ZERO_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ZERO_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid zext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) + return getNode(ISD::ZERO_EXTEND, DL, VT, + Operand.getNode()->getOperand(0)); + break; + case ISD::ANY_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ANY_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid anyext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) + // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + + // (ext (trunx x)) -> x + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = Operand.getNode()->getOperand(0); + if (OpOp.getValueType() == VT) + return OpOp; + } + break; + case ISD::TRUNCATE: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid TRUNCATE!"); + if (Operand.getValueType() == VT) return Operand; // noop truncate + assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) && + "Invalid truncate node, src < dst!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + if (OpOpcode == ISD::TRUNCATE) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { + // If the source is smaller than the dest, we still need an extend. + if (Operand.getNode()->getOperand(0).getValueType().getScalarType() + .bitsLT(VT.getScalarType())) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + else + return Operand.getNode()->getOperand(0); + } + break; + case ISD::BIT_CONVERT: + // Basic sanity checking. + assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() + && "Cannot BIT_CONVERT between types of different sizes!"); + if (VT == Operand.getValueType()) return Operand; // noop conversion. + if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x) + return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0)); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SCALAR_TO_VECTOR: + assert(VT.isVector() && !Operand.getValueType().isVector() && + (VT.getVectorElementType() == Operand.getValueType() || + (VT.getVectorElementType().isInteger() && + Operand.getValueType().isInteger() && + VT.getVectorElementType().bitsLE(Operand.getValueType()))) && + "Illegal SCALAR_TO_VECTOR node!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined. + if (OpOpcode == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(Operand.getOperand(1)) && + Operand.getConstantOperandVal(1) == 0 && + Operand.getOperand(0).getValueType() == VT) + return Operand.getOperand(0); + break; + case ISD::FNEG: + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 + if (UnsafeFPMath && OpOpcode == ISD::FSUB) + return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), + Operand.getNode()->getOperand(0)); + if (OpOpcode == ISD::FNEG) // --X -> X + return Operand.getNode()->getOperand(0); + break; + case ISD::FABS: + if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) + return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0)); + break; + } + + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { // Don't CSE flag producing nodes + FoldingSetNodeID ID; + SDValue Ops[1] = { Operand }; + AddNodeIDNode(ID, Opcode, VTs, Ops, 1); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, + EVT VT, + ConstantSDNode *Cst1, + ConstantSDNode *Cst2) { + const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue(); + + switch (Opcode) { + case ISD::ADD: return getConstant(C1 + C2, VT); + case ISD::SUB: return getConstant(C1 - C2, VT); + case ISD::MUL: return getConstant(C1 * C2, VT); + case ISD::UDIV: + if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT); + break; + case ISD::UREM: + if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT); + break; + case ISD::SDIV: + if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT); + break; + case ISD::SREM: + if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT); + break; + case ISD::AND: return getConstant(C1 & C2, VT); + case ISD::OR: return getConstant(C1 | C2, VT); + case ISD::XOR: return getConstant(C1 ^ C2, VT); + case ISD::SHL: return getConstant(C1 << C2, VT); + case ISD::SRL: return getConstant(C1.lshr(C2), VT); + case ISD::SRA: return getConstant(C1.ashr(C2), VT); + case ISD::ROTL: return getConstant(C1.rotl(C2), VT); + case ISD::ROTR: return getConstant(C1.rotr(C2), VT); + default: break; + } + + return SDValue(); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, + SDValue N1, SDValue N2) { + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + switch (Opcode) { + default: break; + case ISD::TokenFactor: + assert(VT == MVT::Other && N1.getValueType() == MVT::Other && + N2.getValueType() == MVT::Other && "Invalid token factor!"); + // Fold trivial token factors. + if (N1.getOpcode() == ISD::EntryToken) return N2; + if (N2.getOpcode() == ISD::EntryToken) return N1; + if (N1 == N2) return N1; + break; + case ISD::CONCAT_VECTORS: + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to + // one big BUILD_VECTOR. + if (N1.getOpcode() == ISD::BUILD_VECTOR && + N2.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), + N1.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); + return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + } + break; + case ISD::AND: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's + // worth handling here. + if (N2C && N2C->isNullValue()) + return N2; + if (N2C && N2C->isAllOnesValue()) // X & -1 -> X + return N1; + break; + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so + // it's worth handling here. + if (N2C && N2C->isNullValue()) + return N1; + break; + case ISD::UDIV: + case ISD::UREM: + case ISD::MULHU: + case ISD::MULHS: + case ISD::MUL: + case ISD::SDIV: + case ISD::SREM: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (UnsafeFPMath) { + if (Opcode == ISD::FADD) { + // 0+x --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) + if (CFP->getValueAPF().isZero()) + return N2; + // x+0 --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) + if (CFP->getValueAPF().isZero()) + return N1; + } else if (Opcode == ISD::FSUB) { + // x-0 --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) + if (CFP->getValueAPF().isZero()) + return N1; + } + } + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; + case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. + assert(N1.getValueType() == VT && + N1.getValueType().isFloatingPoint() && + N2.getValueType().isFloatingPoint() && + "Invalid FCOPYSIGN!"); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + assert(VT == N1.getValueType() && + "Shift operators return type must be the same as their first arg"); + assert(VT.isInteger() && N2.getValueType().isInteger() && + "Shifts only work on integers"); + + // Always fold shifts of i1 values so the code generator doesn't need to + // handle them. Since we know the size of the shift has to be less than the + // size of the value, the shift/rotate count is guaranteed to be zero. + if (VT == MVT::i1) + return N1; + if (N2C && N2C->isNullValue()) + return N1; + break; + case ISD::FP_ROUND_INREG: { + EVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg round!"); + assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && + "Cannot FP_ROUND_INREG integer types"); + assert(EVT.isVector() == VT.isVector() && + "FP_ROUND_INREG type should be vector iff the operand " + "type is vector!"); + assert((!EVT.isVector() || + EVT.getVectorNumElements() == VT.getVectorNumElements()) && + "Vector element counts must match in FP_ROUND_INREG"); + assert(EVT.bitsLE(VT) && "Not rounding down!"); + if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. + break; + } + case ISD::FP_ROUND: + assert(VT.isFloatingPoint() && + N1.getValueType().isFloatingPoint() && + VT.bitsLE(N1.getValueType()) && + isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!"); + if (N1.getValueType() == VT) return N1; // noop conversion. + break; + case ISD::AssertSext: + case ISD::AssertZext: { + EVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(!EVT.isVector() && + "AssertSExt/AssertZExt type should be the vector element type " + "rather than the vector type!"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (VT == EVT) return N1; // noop assertion. + break; + } + case ISD::SIGN_EXTEND_INREG: { + EVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(EVT.isVector() == VT.isVector() && + "SIGN_EXTEND_INREG type should be vector iff the operand " + "type is vector!"); + assert((!EVT.isVector() || + EVT.getVectorNumElements() == VT.getVectorNumElements()) && + "Vector element counts must match in SIGN_EXTEND_INREG"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (EVT == VT) return N1; // Not actually extending + + if (N1C) { + APInt Val = N1C->getAPIntValue(); + unsigned FromBits = EVT.getScalarType().getSizeInBits(); + Val <<= Val.getBitWidth()-FromBits; + Val = Val.ashr(Val.getBitWidth()-FromBits); + return getConstant(Val, VT); + } + break; + } + case ISD::EXTRACT_VECTOR_ELT: + // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. + if (N1.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is + // expanding copies of large vectors from registers. + if (N2C && + N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0) { + unsigned Factor = + N1.getOperand(0).getValueType().getVectorNumElements(); + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + N1.getOperand(N2C->getZExtValue() / Factor), + getConstant(N2C->getZExtValue() % Factor, + N2.getValueType())); + } + + // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is + // expanding large vector constants. + if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { + SDValue Elt = N1.getOperand(N2C->getZExtValue()); + EVT VEltTy = N1.getValueType().getVectorElementType(); + if (Elt.getValueType() != VEltTy) { + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt); + } + if (VT != VEltTy) { + // If the vector element type is not legal, the EXTRACT_VECTOR_ELT + // result is implicitly extended. + Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt); + } + return Elt; + } + + // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector + // operations are lowered to scalars. + if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { + // If the indices are the same, return the inserted element else + // if the indices are known different, extract the element from + // the original vector. + SDValue N1Op2 = N1.getOperand(2); + ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode()); + + if (N1Op2C && N2C) { + if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { + if (VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); + else + return getSExtOrTrunc(N1.getOperand(1), DL, VT); + } + + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); + } + } + break; + case ISD::EXTRACT_ELEMENT: + assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); + assert(!N1.getValueType().isVector() && !VT.isVector() && + (N1.getValueType().isInteger() == VT.isInteger()) && + "Wrong types for EXTRACT_ELEMENT!"); + + // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding + // 64-bit integers into 32-bit parts. Instead of building the extract of + // the BUILD_PAIR, only to have legalize rip it apart, just do it now. + if (N1.getOpcode() == ISD::BUILD_PAIR) + return N1.getOperand(N2C->getZExtValue()); + + // EXTRACT_ELEMENT of a constant int is also very common. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + unsigned ElementSize = VT.getSizeInBits(); + unsigned Shift = ElementSize * N2C->getZExtValue(); + APInt ShiftedVal = C->getAPIntValue().lshr(Shift); + return getConstant(ShiftedVal.trunc(ElementSize), VT); + } + break; + case ISD::EXTRACT_SUBVECTOR: + if (N1.getValueType() == VT) // Trivial extraction. + return N1; + break; + } + + if (N1C) { + if (N2C) { + SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C); + if (SV.getNode()) return SV; + } else { // Cannonicalize constant to RHS if commutative + if (isCommutativeBinOp(Opcode)) { + std::swap(N1C, N2C); + std::swap(N1, N2); + } + } + } + + // Constant fold FP operations. + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); + if (N1CFP) { + if (!N2CFP && isCommutativeBinOp(Opcode)) { + // Cannonicalize constant to RHS if commutative + std::swap(N1CFP, N2CFP); + std::swap(N1, N2); + } else if (N2CFP && VT != MVT::ppcf128) { + APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); + APFloat::opStatus s; + switch (Opcode) { + case ISD::FADD: + s = V1.add(V2, APFloat::rmNearestTiesToEven); + if (s != APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FSUB: + s = V1.subtract(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FMUL: + s = V1.multiply(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FDIV: + s = V1.divide(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + return getConstantFP(V1, VT); + break; + case ISD::FREM : + s = V1.mod(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + return getConstantFP(V1, VT); + break; + case ISD::FCOPYSIGN: + V1.copySign(V2); + return getConstantFP(V1, VT); + default: break; + } + } + } + + // Canonicalize an UNDEF to the RHS, even over a constant. + if (N1.getOpcode() == ISD::UNDEF) { + if (isCommutativeBinOp(Opcode)) { + std::swap(N1, N2); + } else { + switch (Opcode) { + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: + case ISD::SUB: + case ISD::FSUB: + case ISD::FDIV: + case ISD::FREM: + case ISD::SRA: + return N1; // fold op(undef, arg2) -> undef + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, VT); // fold op(undef, arg2) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N2; + } + } + } + + // Fold a bunch of operators when the RHS is undef. + if (N2.getOpcode() == ISD::UNDEF) { + switch (Opcode) { + case ISD::XOR: + if (N1.getOpcode() == ISD::UNDEF) + // Handle undef ^ undef -> 0 special case. This is a common + // idiom (misuse). + return getConstant(0, VT); + // fallthrough + case ISD::ADD: + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUB: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + return N2; // fold op(arg1, undef) -> undef + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (UnsafeFPMath) + return N2; + break; + case ISD::MUL: + case ISD::AND: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, VT); // fold op(arg1, undef) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N1; + case ISD::OR: + if (!VT.isVector()) + return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + // For vectors, we can't easily build an all one vector, just return + // the LHS. + return N1; + case ISD::SRA: + return N1; + } + } + + // Memoize this node if possible. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { + SDValue Ops[] = { N1, N2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3) { + // Perform various simplifications. + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + switch (Opcode) { + case ISD::CONCAT_VECTORS: + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to + // one big BUILD_VECTOR. + if (N1.getOpcode() == ISD::BUILD_VECTOR && + N2.getOpcode() == ISD::BUILD_VECTOR && + N3.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), + N1.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); + return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + } + break; + case ISD::SETCC: { + // Use FoldSetCC to simplify SETCC's. + SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL); + if (Simp.getNode()) return Simp; + break; + } + case ISD::SELECT: + if (N1C) { + if (N1C->getZExtValue()) + return N2; // select true, X, Y -> X + else + return N3; // select false, X, Y -> Y + } + + if (N2 == N3) return N2; // select C, X, X -> X + break; + case ISD::VECTOR_SHUFFLE: + llvm_unreachable("should use getVectorShuffle constructor!"); + break; + case ISD::BIT_CONVERT: + // Fold bit_convert nodes from a type to themselves. + if (N1.getValueType() == VT) + return N1; + break; + } + + // Memoize node if it doesn't produce a flag. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { + SDValue Ops[] = { N1, N2, N3 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VT, Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VT, Ops, 5); +} + +/// getStackArgumentTokenFactor - Compute a TokenFactor to force all +/// the incoming stack arguments to be loaded from the stack. +SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { + SmallVector<SDValue, 8> ArgChains; + + // Include the original chain at the beginning of the list. When this is + // used by target LowerCall hooks, this helps legalize find the + // CALLSEQ_BEGIN node. + ArgChains.push_back(Chain); + + // Add a chain value for each stack argument. + for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(), + UE = getEntryNode().getNode()->use_end(); U != UE; ++U) + if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) + if (FI->getIndex() < 0) + ArgChains.push_back(SDValue(L, 1)); + + // Build a tokenfactor for all the chains. + return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + &ArgChains[0], ArgChains.size()); +} + +/// getMemsetValue - Vectorized representation of the memset value +/// operand. +static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, + DebugLoc dl) { + assert(Value.getOpcode() != ISD::UNDEF); + + unsigned NumBits = VT.getScalarType().getSizeInBits(); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { + APInt Val = APInt(NumBits, C->getZExtValue() & 255); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Val = (Val << Shift) | Val; + Shift <<= 1; + } + if (VT.isInteger()) + return DAG.getConstant(Val, VT); + return DAG.getConstantFP(APFloat(Val), VT); + } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Value = DAG.getNode(ISD::OR, dl, VT, + DAG.getNode(ISD::SHL, dl, VT, Value, + DAG.getConstant(Shift, + TLI.getShiftAmountTy())), + Value); + Shift <<= 1; + } + + return Value; +} + +/// getMemsetStringVal - Similar to getMemsetValue. Except this is only +/// used when a memcpy is turned into a memset when the source is a constant +/// string ptr. +static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, + const TargetLowering &TLI, + std::string &Str, unsigned Offset) { + // Handle vector with all elements zero. + if (Str.empty()) { + if (VT.isInteger()) + return DAG.getConstant(0, VT); + else if (VT.getSimpleVT().SimpleTy == MVT::f32 || + VT.getSimpleVT().SimpleTy == MVT::f64) + return DAG.getConstantFP(0.0, VT); + else if (VT.isVector()) { + unsigned NumElts = VT.getVectorNumElements(); + MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(), + EltVT, NumElts))); + } else + llvm_unreachable("Expected type!"); + } + + assert(!VT.isVector() && "Can't handle vector type here!"); + unsigned NumBits = VT.getSizeInBits(); + unsigned MSB = NumBits / 8; + uint64_t Val = 0; + if (TLI.isLittleEndian()) + Offset = Offset + MSB - 1; + for (unsigned i = 0; i != MSB; ++i) { + Val = (Val << 8) | (unsigned char)Str[Offset]; + Offset += TLI.isLittleEndian() ? -1 : 1; + } + return DAG.getConstant(Val, VT); +} + +/// getMemBasePlusOffset - Returns base and offset node for the +/// +static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, + SelectionDAG &DAG) { + EVT VT = Base.getValueType(); + return DAG.getNode(ISD::ADD, Base.getDebugLoc(), + VT, Base, DAG.getConstant(Offset, VT)); +} + +/// isMemSrcFromString - Returns true if memcpy source is a string constant. +/// +static bool isMemSrcFromString(SDValue Src, std::string &Str) { + unsigned SrcDelta = 0; + GlobalAddressSDNode *G = NULL; + if (Src.getOpcode() == ISD::GlobalAddress) + G = cast<GlobalAddressSDNode>(Src); + else if (Src.getOpcode() == ISD::ADD && + Src.getOperand(0).getOpcode() == ISD::GlobalAddress && + Src.getOperand(1).getOpcode() == ISD::Constant) { + G = cast<GlobalAddressSDNode>(Src.getOperand(0)); + SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue(); + } + if (!G) + return false; + + const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); + if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false)) + return true; + + return false; +} + +/// FindOptimalMemOpLowering - Determines the optimial series memory ops +/// to replace the memset / memcpy. Return true if the number of memory ops +/// is below the threshold. It returns the types of the sequence of +/// memory ops to perform memset / memcpy by reference. +static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, + unsigned Limit, uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, + bool MemcpyStrSrc, + SelectionDAG &DAG, + const TargetLowering &TLI) { + assert((SrcAlign == 0 || SrcAlign >= DstAlign) && + "Expecting memcpy / memset source to meet alignment requirement!"); + // If 'SrcAlign' is zero, that means the memory operation does not need load + // the value, i.e. memset or memcpy from constant string. Otherwise, it's + // the inferred alignment of the source. 'DstAlign', on the other hand, is the + // specified alignment of the memory operation. If it is zero, that means + // it's possible to change the alignment of the destination. 'MemcpyStrSrc' + // indicates whether the memcpy source is constant so it does not need to be + // loaded. + EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, + NonScalarIntSafe, MemcpyStrSrc, + DAG.getMachineFunction()); + + if (VT == MVT::Other) { + if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() || + TLI.allowsUnalignedMemoryAccesses(VT)) { + VT = TLI.getPointerTy(); + } else { + switch (DstAlign & 7) { + case 0: VT = MVT::i64; break; + case 4: VT = MVT::i32; break; + case 2: VT = MVT::i16; break; + default: VT = MVT::i8; break; + } + } + + MVT LVT = MVT::i64; + while (!TLI.isTypeLegal(LVT)) + LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); + assert(LVT.isInteger()); + + if (VT.bitsGT(LVT)) + VT = LVT; + } + + // If we're optimizing for size, and there is a limit, bump the maximum number + // of operations inserted down to 4. This is a wild guess that approximates + // the size of a call to memcpy or memset (3 arguments + call). + if (Limit != ~0U) { + const Function *F = DAG.getMachineFunction().getFunction(); + if (F->hasFnAttr(Attribute::OptimizeForSize)) + Limit = 4; + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned VTSize = VT.getSizeInBits() / 8; + while (VTSize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + if (VT.isVector() || VT.isFloatingPoint()) { + VT = MVT::i64; + while (!TLI.isTypeLegal(VT)) + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + VTSize = VT.getSizeInBits() / 8; + } else { + // This can result in a type that is not legal on the target, e.g. + // 1 or 2 bytes on PPC. + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + VTSize >>= 1; + } + } + + if (++NumMemOps > Limit) + return false; + MemOps.push_back(VT); + Size -= VTSize; + } + + return true; +} + +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool isVol, + bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { + // Turn a memcpy of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; + + // Expand memcpy to a series of load and store ops if the size operand falls + // below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + if (Align > SrcAlign) + SrcAlign = Align; + std::string Str; + bool CopyFromStr = isMemSrcFromString(Src, Str); + bool isZeroStr = CopyFromStr && Str.empty(); + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(); + + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, + (DstAlignCanChange ? 0 : Align), + (isZeroStr ? 0 : SrcAlign), + true, CopyFromStr, DAG, TLI)) + return SDValue(); + + if (DstAlignCanChange) { + const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + + SmallVector<SDValue, 8> OutChains; + unsigned NumMemOps = MemOps.size(); + uint64_t SrcOff = 0, DstOff = 0; + for (unsigned i = 0; i != NumMemOps; ++i) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + if (CopyFromStr && + (isZeroStr || (VT.isInteger() && !VT.isVector()))) { + // It's unlikely a store of a vector immediate can be done in a single + // instruction. It would require a load from a constantpool first. + // We only handle zero vectors here. + // FIXME: Handle other cases where store of vector immediate is done in + // a single instruction. + Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, isVol, false, Align); + } else { + // The type might not be legal for the target. This should only happen + // if the type is smaller than a legal type, as on PPC, so the right + // thing to do is generate a LoadExt/StoreTrunc pair. These simplify + // to Load/Store if NVT==VT. + // FIXME does the case above also need this? + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + assert(NVT.bitsGE(VT)); + Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcSV, SrcSVOff + SrcOff, VT, isVol, false, + MinAlign(SrcAlign, SrcOff)); + Store = DAG.getTruncStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, VT, isVol, false, + Align); + } + OutChains.push_back(Store); + SrcOff += VTSize; + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool isVol, + bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { + // Turn a memmove of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; + + // Expand memmove to a series of load and store ops if the size operand falls + // below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + if (Align > SrcAlign) + SrcAlign = Align; + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(); + + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, + (DstAlignCanChange ? 0 : Align), + SrcAlign, true, false, DAG, TLI)) + return SDValue(); + + if (DstAlignCanChange) { + const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + + uint64_t SrcOff = 0, DstOff = 0; + SmallVector<SDValue, 8> LoadValues; + SmallVector<SDValue, 8> LoadChains; + SmallVector<SDValue, 8> OutChains; + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + Value = DAG.getLoad(VT, dl, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign); + LoadValues.push_back(Value); + LoadChains.push_back(Value.getValue(1)); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + OutChains.clear(); + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + Store = DAG.getStore(Chain, dl, LoadValues[i], + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, isVol, false, Align); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool isVol, + const Value *DstSV, uint64_t DstSVOff) { + // Turn a memset of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; + + // Expand memset to a series of load/store ops if the size operand + // falls below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + bool NonScalarIntSafe = + isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); + if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(), + Size, (DstAlignCanChange ? 0 : Align), 0, + NonScalarIntSafe, false, DAG, TLI)) + return SDValue(); + + if (DstAlignCanChange) { + const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + + SmallVector<SDValue, 8> OutChains; + uint64_t DstOff = 0; + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value = getMemsetValue(Src, VT, DAG, dl); + SDValue Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, isVol, false, 0); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVol, bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { + + // Check to see if we should lower the memcpy to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memcpy with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(),Align, + isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memcpy with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, + isVol, AlwaysInline, + DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + + // If we really need inline code and the target declined to provide it, + // use a (potentially long) sequence of loads and stores. + if (AlwaysInline) { + assert(ConstantSize && "AlwaysInline requires a constant size!"); + return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Align, isVol, + true, DstSV, DstSVOff, SrcSV, SrcSVOff); + } + + // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc + // memcpy is not guaranteed to be safe. libc memcpys aren't required to + // respect volatile, so they may do things like read or write memory + // beyond the given memory regions. But fixing this isn't easy, and most + // people don't care. + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMCPY), false, + /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), + TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVol, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { + + // Check to see if we should lower the memmove to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memmove with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Align, isVol, + false, DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memmove with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, + DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + + // FIXME: If the memmove is volatile, lowering it to plain libc memmove may + // not be safe. See memcpy above for more details. + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false, + /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), + TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVol, + const Value *DstSV, uint64_t DstSVOff) { + + // Check to see if we should lower the memset to stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memset with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), + Align, isVol, DstSV, DstSVOff); + + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memset with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, + DstSV, DstSVOff); + if (Result.getNode()) + return Result; + + // Emit a library call. + const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = IntPtrTy; + Args.push_back(Entry); + // Extend or truncate the argument to be an i32 value for the call. + if (Src.getValueType().bitsGT(MVT::i32)) + Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src); + else + Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + Entry.Node = Src; + Entry.Ty = Type::getInt32Ty(*getContext()); + Entry.isSExt = true; + Args.push_back(Entry); + Entry.Node = Size; + Entry.Ty = IntPtrTy; + Entry.isSExt = false; + Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMSET), false, + /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), + TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Cmp, + SDValue Swp, const Value* PtrVal, + unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + // Check if the memory reference references a frame index + if (!PtrVal) + if (const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) + PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + + // For now, atomics are considered to be volatile always. + Flags |= MachineMemOperand::MOVolatile; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrVal, Flags, 0, + MemVT.getStoreSize(), Alignment); + + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Cmp, + SDValue Swp, MachineMemOperand *MMO) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + + EVT VT = Cmp.getValueType(); + + SDVTList VTs = getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 4); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, + Ptr, Cmp, Swp, MMO); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Val, + const Value* PtrVal, + unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + // Check if the memory reference references a frame index + if (!PtrVal) + if (const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) + PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + + // For now, atomics are considered to be volatile always. + Flags |= MachineMemOperand::MOVolatile; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrVal, Flags, 0, + MemVT.getStoreSize(), Alignment); + + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Val, + MachineMemOperand *MMO) { + assert((Opcode == ISD::ATOMIC_LOAD_ADD || + Opcode == ISD::ATOMIC_LOAD_SUB || + Opcode == ISD::ATOMIC_LOAD_AND || + Opcode == ISD::ATOMIC_LOAD_OR || + Opcode == ISD::ATOMIC_LOAD_XOR || + Opcode == ISD::ATOMIC_LOAD_NAND || + Opcode == ISD::ATOMIC_LOAD_MIN || + Opcode == ISD::ATOMIC_LOAD_MAX || + Opcode == ISD::ATOMIC_LOAD_UMIN || + Opcode == ISD::ATOMIC_LOAD_UMAX || + Opcode == ISD::ATOMIC_SWAP) && + "Invalid Atomic Op"); + + EVT VT = Val.getValueType(); + + SDVTList VTs = getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr, Val}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, + Ptr, Val, MMO); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +/// getMergeValues - Create a MERGE_VALUES node from the given operands. +/// Allowed to return something different (and simpler) if Simplify is true. +SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, + DebugLoc dl) { + if (NumOps == 1) + return Ops[0]; + + SmallVector<EVT, 4> VTs; + VTs.reserve(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + VTs.push_back(Ops[i].getValueType()); + return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps), + Ops, NumOps); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, + const EVT *VTs, unsigned NumVTs, + const SDValue *Ops, unsigned NumOps, + EVT MemVT, const Value *srcValue, int SVOff, + unsigned Align, bool Vol, + bool ReadMem, bool WriteMem) { + return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, + MemVT, srcValue, SVOff, Align, Vol, + ReadMem, WriteMem); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, + EVT MemVT, const Value *srcValue, int SVOff, + unsigned Align, bool Vol, + bool ReadMem, bool WriteMem) { + if (Align == 0) // Ensure that codegen never sees alignment 0 + Align = getEVTAlignment(MemVT); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = 0; + if (WriteMem) + Flags |= MachineMemOperand::MOStore; + if (ReadMem) + Flags |= MachineMemOperand::MOLoad; + if (Vol) + Flags |= MachineMemOperand::MOVolatile; + MachineMemOperand *MMO = + MF.getMachineMemOperand(srcValue, Flags, SVOff, + MemVT.getStoreSize(), Align); + + return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, + EVT MemVT, MachineMemOperand *MMO) { + assert((Opcode == ISD::INTRINSIC_VOID || + Opcode == ISD::INTRINSIC_W_CHAIN || + (Opcode <= INT_MAX && + (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && + "Opcode is not a memory-accessing opcode!"); + + // Memoize the node unless it returns a flag. + MemIntrinsicSDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, + MemVT, MMO); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, + MemVT, MMO); + } + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, + SDValue Ptr, SDValue Offset, + const Value *SV, int SVOffset, EVT MemVT, + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(VT); + + // Check if the memory reference references a frame index + if (!SV) + if (const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) + SV = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOLoad; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + MachineMemOperand *MMO = + MF.getMachineMemOperand(SV, Flags, SVOffset, + MemVT.getStoreSize(), Alignment); + return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); +} + +SDValue +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, + SDValue Ptr, SDValue Offset, EVT MemVT, + MachineMemOperand *MMO) { + if (VT == MemVT) { + ExtType = ISD::NON_EXTLOAD; + } else if (ExtType == ISD::NON_EXTLOAD) { + assert(VT == MemVT && "Non-extending load from different memory type!"); + } else { + // Extending load. + assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be an extending load, not truncating!"); + assert(VT.isInteger() == MemVT.isInteger() && + "Cannot convert from FP to Int or Int -> FP!"); + assert(VT.isVector() == MemVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == MemVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); + } + + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.getOpcode() == ISD::UNDEF) && + "Unindexed load with an offset!"); + + SDVTList VTs = Indexed ? + getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), + MMO->isNonTemporal())); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<LoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType, + MemVT, MMO); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, + const Value *SV, int SVOffset, + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment); +} + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, + const Value *SV, + int SVOffset, EVT MemVT, + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, + SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); +} + +SDValue +SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); + assert(LD->getOffset().getOpcode() == ISD::UNDEF && + "Load is already a indexed load!"); + return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, + LD->getChain(), Base, Offset, LD->getSrcValue(), + LD->getSrcValueOffset(), LD->getMemoryVT(), + LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); +} + +SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, const Value *SV, int SVOffset, + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(Val.getValueType()); + + // Check if the memory reference references a frame index + if (!SV) + if (const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) + SV = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + MachineMemOperand *MMO = + MF.getMachineMemOperand(SV, Flags, SVOffset, + Val.getValueType().getStoreSize(), Alignment); + + return getStore(Chain, dl, Val, Ptr, MMO); +} + +SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, MachineMemOperand *MMO) { + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal())); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<StoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, + false, VT, MMO); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, const Value *SV, + int SVOffset, EVT SVT, + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(SVT); + + // Check if the memory reference references a frame index + if (!SV) + if (const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) + SV = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + MachineMemOperand *MMO = + MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment); + + return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, EVT SVT, + MachineMemOperand *MMO) { + EVT VT = Val.getValueType(); + + if (VT == SVT) + return getStore(Chain, dl, Val, Ptr, MMO); + + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); + assert(VT.isInteger() == SVT.isInteger() && + "Can't do FP-INT conversion!"); + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == SVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal())); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<StoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, + true, SVT, MMO); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + StoreSDNode *ST = cast<StoreSDNode>(OrigStore); + assert(ST->getOffset().getOpcode() == ISD::UNDEF && + "Store is already a indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM, + ST->isTruncatingStore(), + ST->getMemoryVT(), + ST->getMemOperand()); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, + SDValue SV, + unsigned Align) { + SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, + const SDUse *Ops, unsigned NumOps) { + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + // Copy from an SDUse array into an SDValue array for use with + // the regular getNode logic. + SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps); + return getNode(Opcode, DL, VT, &NewOps[0], NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, + const SDValue *Ops, unsigned NumOps) { + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + switch (Opcode) { + default: break; + case ISD::SELECT_CC: { + assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); + assert(Ops[0].getValueType() == Ops[1].getValueType() && + "LHS and RHS of condition must have same type!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "True and False arms of SelectCC must have same type!"); + assert(Ops[2].getValueType() == VT && + "select_cc node must be of same type as true and false value!"); + break; + } + case ISD::BR_CC: { + assert(NumOps == 5 && "BR_CC takes 5 operands!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "LHS/RHS of comparison should match types!"); + break; + } + } + + // Memoize nodes. + SDNode *N; + SDVTList VTs = getVTList(VT); + + if (VT != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); + void *IP = 0; + + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + const std::vector<EVT> &ResultTys, + const SDValue *Ops, unsigned NumOps) { + return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), + Ops, NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + const EVT *VTs, unsigned NumVTs, + const SDValue *Ops, unsigned NumOps) { + if (NumVTs == 1) + return getNode(Opcode, DL, VTs[0], Ops, NumOps); + return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + const SDValue *Ops, unsigned NumOps) { + if (VTList.NumVTs == 1) + return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); + +#if 0 + switch (Opcode) { + // FIXME: figure out how to safely handle things like + // int foo(int x) { return 1 << (x & 255); } + // int bar() { return foo(256); } + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: + case ISD::SHL_PARTS: + if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + else if (N3.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { + // If the and is only masking out bits that cannot effect the shift, + // eliminate the and. + unsigned NumBits = VT.getScalarType().getSizeInBits()*2; + if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + } + break; + } +#endif + + // Memoize the node unless it returns a flag. + SDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + if (NumOps == 1) { + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); + } else if (NumOps == 2) { + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + } else if (NumOps == 3) { + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], + Ops[2]); + } else { + N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); + } + CSEMap.InsertNode(N, IP); + } else { + if (NumOps == 1) { + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); + } else if (NumOps == 2) { + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + } else if (NumOps == 3) { + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], + Ops[2]); + } else { + N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); + } + } + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) { + return getNode(Opcode, DL, VTList, 0, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1) { + SDValue Ops[] = { N1 }; + return getNode(Opcode, DL, VTList, Ops, 1); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2) { + SDValue Ops[] = { N1, N2 }; + return getNode(Opcode, DL, VTList, Ops, 2); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3) { + SDValue Ops[] = { N1, N2, N3 }; + return getNode(Opcode, DL, VTList, Ops, 3); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VTList, Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VTList, Ops, 5); +} + +SDVTList SelectionDAG::getVTList(EVT VT) { + return makeVTList(SDNode::getValueTypeList(VT), 1); +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) + return *I; + + EVT *Array = Allocator.Allocate<EVT>(2); + Array[0] = VT1; + Array[1] = VT2; + SDVTList Result = makeVTList(Array, 2); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && + I->VTs[2] == VT3) + return *I; + + EVT *Array = Allocator.Allocate<EVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + SDVTList Result = makeVTList(Array, 3); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && + I->VTs[2] == VT3 && I->VTs[3] == VT4) + return *I; + + EVT *Array = Allocator.Allocate<EVT>(4); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + SDVTList Result = makeVTList(Array, 4); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { + switch (NumVTs) { + case 0: llvm_unreachable("Cannot have nodes without results!"); + case 1: return getVTList(VTs[0]); + case 2: return getVTList(VTs[0], VTs[1]); + case 3: return getVTList(VTs[0], VTs[1], VTs[2]); + case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]); + default: break; + } + + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) { + if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) + continue; + + bool NoMatch = false; + for (unsigned i = 2; i != NumVTs; ++i) + if (VTs[i] != I->VTs[i]) { + NoMatch = true; + break; + } + if (!NoMatch) + return *I; + } + + EVT *Array = Allocator.Allocate<EVT>(NumVTs); + std::copy(VTs, VTs+NumVTs, Array); + SDVTList Result = makeVTList(Array, NumVTs); + VTList.push_back(Result); + return Result; +} + + +/// UpdateNodeOperands - *Mutate* the specified node in-place to have the +/// specified operands. If the resultant node already exists in the DAG, +/// this does not modify the specified node, instead it returns the node that +/// already exists. If the resultant node does not exist in the DAG, the +/// input node is returned. As a degenerate case, if you specify the same +/// input operands as the node already has, the input node is returned. +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { + assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op == N->getOperand(0)) return N; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + N->OperandList[0].set(Op); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { + assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) + return N; // No operands changed, just return the input node. + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + if (N->OperandList[0] != Op1) + N->OperandList[0].set(Op1); + if (N->OperandList[1] != Op2) + N->OperandList[1].set(Op2); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { + SDValue Ops[] = { Op1, Op2, Op3 }; + return UpdateNodeOperands(N, Ops, 3); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4) { + SDValue Ops[] = { Op1, Op2, Op3, Op4 }; + return UpdateNodeOperands(N, Ops, 4); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4, SDValue Op5) { + SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; + return UpdateNodeOperands(N, Ops, 5); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { + assert(N->getNumOperands() == NumOps && + "Update with wrong number of operands"); + + // Check to see if there is no change. + bool AnyChange = false; + for (unsigned i = 0; i != NumOps; ++i) { + if (Ops[i] != N->getOperand(i)) { + AnyChange = true; + break; + } + } + + // No operands changed, just return the input node. + if (!AnyChange) return N; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + for (unsigned i = 0; i != NumOps; ++i) + if (N->OperandList[i] != Ops[i]) + N->OperandList[i].set(Ops[i]); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +/// DropOperands - Release the operands and set this node to have +/// zero operands. +void SDNode::DropOperands() { + // Unlike the code in MorphNodeTo that does this, we don't need to + // watch for dead nodes here. + for (op_iterator I = op_begin(), E = op_end(); I != E; ) { + SDUse &Use = *I++; + Use.set(SDValue()); + } +} + +/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a +/// machine opcode. +/// +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, 0, 0); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, EVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, EVT VT3, EVT VT4, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, + SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + SDVTList VTs, const SDValue *Ops, + unsigned NumOps) { + N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps); + // Reset the NodeID to -1. + N->setNodeId(-1); + return N; +} + +/// MorphNodeTo - This *mutates* the specified node to have the specified +/// return type, opcode, and operands. +/// +/// Note that MorphNodeTo returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. Note that the DebugLoc need not be the same. +/// +/// Using MorphNodeTo is faster than creating a new node and swapping it in +/// with ReplaceAllUsesWith both because it often avoids allocating a new +/// node, and because it doesn't require CSE recalculation for any of +/// the node's users. +/// +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + SDVTList VTs, const SDValue *Ops, + unsigned NumOps) { + // If an identical node already exists, use it. + void *IP = 0; + if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return ON; + } + + if (!RemoveNodeFromCSEMaps(N)) + IP = 0; + + // Start the morphing. + N->NodeType = Opc; + N->ValueList = VTs.VTs; + N->NumValues = VTs.NumVTs; + + // Clear the operands list, updating used nodes to remove this from their + // use list. Keep track of any operands that become dead as a result. + SmallPtrSet<SDNode*, 16> DeadNodeSet; + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Used = Use.getNode(); + Use.set(SDValue()); + if (Used->use_empty()) + DeadNodeSet.insert(Used); + } + + if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) { + // Initialize the memory references information. + MN->setMemRefs(0, 0); + // If NumOps is larger than the # of operands we can have in a + // MachineSDNode, reallocate the operand list. + if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) { + if (MN->OperandsNeedDelete) + delete[] MN->OperandList; + if (NumOps > array_lengthof(MN->LocalOperands)) + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), + Ops, NumOps); + else + MN->InitOperands(MN->LocalOperands, Ops, NumOps); + MN->OperandsNeedDelete = false; + } else + MN->InitOperands(MN->OperandList, Ops, NumOps); + } else { + // If NumOps is larger than the # of operands we currently have, reallocate + // the operand list. + if (NumOps > N->NumOperands) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + N->InitOperands(new SDUse[NumOps], Ops, NumOps); + N->OperandsNeedDelete = true; + } else + N->InitOperands(N->OperandList, Ops, NumOps); + } + + // Delete any nodes that are still dead after adding the uses for the + // new operands. + if (!DeadNodeSet.empty()) { + SmallVector<SDNode *, 16> DeadNodes; + for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(), + E = DeadNodeSet.end(); I != E; ++I) + if ((*I)->use_empty()) + DeadNodes.push_back(*I); + RemoveDeadNodes(DeadNodes); + } + + if (IP) + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + + +/// getMachineNode - These are used for target selectors to create a new node +/// with specified return type(s), MachineInstr opcode, and operands. +/// +/// Note that getMachineNode returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, 0, 0); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return getMachineNode(Opcode, dl, VTs, 0, 0); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, + EVT VT2, EVT VT3, EVT VT4, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + const std::vector<EVT> &ResultTys, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, + const SDValue *Ops, unsigned NumOps) { + bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag; + MachineSDNode *N; + void *IP; + + if (DoCSE) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); + IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return cast<MachineSDNode>(E); + } + + // Allocate a new MachineSDNode. + N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs); + + // Initialize the operands list. + if (NumOps > array_lengthof(N->LocalOperands)) + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), + Ops, NumOps); + else + N->InitOperands(N->LocalOperands, Ops, NumOps); + N->OperandsNeedDelete = false; + + if (DoCSE) + CSEMap.InsertNode(N, IP); + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return N; +} + +/// getTargetExtractSubreg - A convenience function for creating +/// TargetOpcode::EXTRACT_SUBREG nodes. +SDValue +SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand) { + SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + VT, Operand, SRIdxVal); + return SDValue(Subreg, 0); +} + +/// getTargetInsertSubreg - A convenience function for creating +/// TargetOpcode::INSERT_SUBREG nodes. +SDValue +SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand, SDValue Subreg) { + SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, + VT, Operand, Subreg, SRIdxVal); + return SDValue(Result, 0); +} + +/// getNodeIfExists - Get the specified node if it's already available, or +/// else return NULL. +SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, + const SDValue *Ops, unsigned NumOps) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return E; + } + return NULL; +} + +/// getDbgValue - Creates a SDDbgValue node. +/// +SDDbgValue * +SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O); +} + +SDDbgValue * +SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O); +} + +SDDbgValue * +SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O); +} + +namespace { + +/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node +/// pointed to by a use iterator is deleted, increment the use iterator +/// so that it doesn't dangle. +/// +/// This class also manages a "downlink" DAGUpdateListener, to forward +/// messages to ReplaceAllUsesWith's callers. +/// +class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { + SelectionDAG::DAGUpdateListener *DownLink; + SDNode::use_iterator &UI; + SDNode::use_iterator &UE; + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + // Increment the iterator as needed. + while (UI != UE && N == *UI) + ++UI; + + // Then forward the message. + if (DownLink) DownLink->NodeDeleted(N, E); + } + + virtual void NodeUpdated(SDNode *N) { + // Just forward the message. + if (DownLink) DownLink->NodeUpdated(N); + } + +public: + RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl, + SDNode::use_iterator &ui, + SDNode::use_iterator &ue) + : DownLink(dl), UI(ui), UE(ue) {} +}; + +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes From has a single result value. +/// +void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, + DAGUpdateListener *UpdateListener) { + SDNode *From = FromN.getNode(); + assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && + "Cannot replace with this method!"); + assert(From != To.getNode() && "Cannot replace uses of with self"); + + // Iterate over all the existing uses of From. New uses will be added + // to the beginning of the use list, which we avoid visiting. + // This specifically avoids visiting uses of From that arise while the + // replacement is happening, because any such uses would be the result + // of CSE: If an existing node looks like From after one of its operands + // is replaced by To, we don't want to replace of all its users with To + // too. See PR3018 for more info. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, &Listener); + } +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes that for each value of From, there is a +/// corresponding value in To in the same position with the same type. +/// +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, + DAGUpdateListener *UpdateListener) { +#ifndef NDEBUG + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + assert((!From->hasAnyUseOfValue(i) || + From->getValueType(i) == To->getValueType(i)) && + "Cannot use this version of ReplaceAllUsesWith!"); +#endif + + // Handle the trivial case. + if (From == To) + return; + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.setNode(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, &Listener); + } +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version can replace From with any result values. To must match the +/// number and types of values returned by From. +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, + const SDValue *To, + DAGUpdateListener *UpdateListener) { + if (From->getNumValues() == 1) // Handle the simple case efficiently. + return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener); + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + const SDValue &ToOp = To[Use.getResNo()]; + ++UI; + Use.set(ToOp); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, &Listener); + } +} + +/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The Deleted +/// vector is handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, + DAGUpdateListener *UpdateListener){ + // Handle the really simple, really trivial case efficiently. + if (From == To) return; + + // Handle the simple, trivial, case efficiently. + if (From.getNode()->getNumValues() == 1) { + ReplaceAllUsesWith(From, To, UpdateListener); + return; + } + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From.getNode()->use_begin(), + UE = From.getNode()->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + bool UserRemovedFromCSEMaps = false; + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + + // Skip uses of different values from the same node. + if (Use.getResNo() != From.getResNo()) { + ++UI; + continue; + } + + // If this node hasn't been modified yet, it's still in the CSE maps, + // so remove its old self from the CSE maps. + if (!UserRemovedFromCSEMaps) { + RemoveNodeFromCSEMaps(User); + UserRemovedFromCSEMaps = true; + } + + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // We are iterating over all uses of the From node, so if a use + // doesn't use the specific value, no changes are made. + if (!UserRemovedFromCSEMaps) + continue; + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, &Listener); + } +} + +namespace { + /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith + /// to record information about a use. + struct UseMemo { + SDNode *User; + unsigned Index; + SDUse *Use; + }; + + /// operator< - Sort Memos by User. + bool operator<(const UseMemo &L, const UseMemo &R) { + return (intptr_t)L.User < (intptr_t)R.User; + } +} + +/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The same value +/// may appear in both the From and To list. The Deleted vector is +/// handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, + const SDValue *To, + unsigned Num, + DAGUpdateListener *UpdateListener){ + // Handle the simple, trivial case efficiently. + if (Num == 1) + return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener); + + // Read up all the uses and make records of them. This helps + // processing new uses that are introduced during the + // replacement process. + SmallVector<UseMemo, 4> Uses; + for (unsigned i = 0; i != Num; ++i) { + unsigned FromResNo = From[i].getResNo(); + SDNode *FromNode = From[i].getNode(); + for (SDNode::use_iterator UI = FromNode->use_begin(), + E = FromNode->use_end(); UI != E; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == FromResNo) { + UseMemo Memo = { *UI, i, &Use }; + Uses.push_back(Memo); + } + } + } + + // Sort the uses, so that all the uses from a given User are together. + std::sort(Uses.begin(), Uses.end()); + + for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); + UseIndex != UseIndexEnd; ) { + // We know that this user uses some value of From. If it is the right + // value, update it. + SDNode *User = Uses[UseIndex].User; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // The Uses array is sorted, so all the uses for a given User + // are next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + unsigned i = Uses[UseIndex].Index; + SDUse &Use = *Uses[UseIndex].Use; + ++UseIndex; + + Use.set(To[i]); + } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG +/// based on their topological order. It returns the maximum id and a vector +/// of the SDNodes* in assigned order by reference. +unsigned SelectionDAG::AssignTopologicalOrder() { + + unsigned DAGSize = 0; + + // SortedPos tracks the progress of the algorithm. Nodes before it are + // sorted, nodes after it are unsorted. When the algorithm completes + // it is at the end of the list. + allnodes_iterator SortedPos = allnodes_begin(); + + // Visit all the nodes. Move nodes with no operands to the front of + // the list immediately. Annotate nodes that do have operands with their + // operand count. Before we do this, the Node Id fields of the nodes + // may contain arbitrary values. After, the Node Id fields for nodes + // before SortedPos will contain the topological sort index, and the + // Node Id fields for nodes At SortedPos and after will contain the + // count of outstanding operands. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { + SDNode *N = I++; + checkForCycles(N); + unsigned Degree = N->getNumOperands(); + if (Degree == 0) { + // A node with no uses, add it to the result array immediately. + N->setNodeId(DAGSize++); + allnodes_iterator Q = N; + if (Q != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); + assert(SortedPos != AllNodes.end() && "Overran node list"); + ++SortedPos; + } else { + // Temporarily use the Node Id as scratch space for the degree count. + N->setNodeId(Degree); + } + } + + // Visit all the nodes. As we iterate, moves nodes into sorted order, + // such that by the time the end is reached all nodes will be sorted. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { + SDNode *N = I; + checkForCycles(N); + // N is in sorted position, so all its uses have one less operand + // that needs to be sorted. + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *P = *UI; + unsigned Degree = P->getNodeId(); + assert(Degree != 0 && "Invalid node degree"); + --Degree; + if (Degree == 0) { + // All of P's operands are sorted, so P may sorted now. + P->setNodeId(DAGSize++); + if (P != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); + assert(SortedPos != AllNodes.end() && "Overran node list"); + ++SortedPos; + } else { + // Update P's outstanding operand count. + P->setNodeId(Degree); + } + } + if (I == SortedPos) { +#ifndef NDEBUG + SDNode *S = ++I; + dbgs() << "Overran sorted position:\n"; + S->dumprFull(); +#endif + llvm_unreachable(0); + } + } + + assert(SortedPos == AllNodes.end() && + "Topological sort incomplete!"); + assert(AllNodes.front().getOpcode() == ISD::EntryToken && + "First node in topological sort is not the entry token!"); + assert(AllNodes.front().getNodeId() == 0 && + "First node in topological sort has non-zero id!"); + assert(AllNodes.front().getNumOperands() == 0 && + "First node in topological sort has operands!"); + assert(AllNodes.back().getNodeId() == (int)DAGSize-1 && + "Last node in topologic sort has unexpected id!"); + assert(AllNodes.back().use_empty() && + "Last node in topologic sort has users!"); + assert(DAGSize == allnodes_size() && "Node count mismatch!"); + return DAGSize; +} + +/// AssignOrdering - Assign an order to the SDNode. +void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) { + assert(SD && "Trying to assign an order to a null node!"); + Ordering->add(SD, Order); +} + +/// GetOrdering - Get the order for the SDNode. +unsigned SelectionDAG::GetOrdering(const SDNode *SD) const { + assert(SD && "Trying to get the order of a null node!"); + return Ordering->getOrder(SD); +} + +/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the +/// value is produced by SD. +void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { + DbgInfo->add(DB, SD, isParameter); + if (SD) + SD->setHasDebugValue(true); +} + +//===----------------------------------------------------------------------===// +// SDNode Class +//===----------------------------------------------------------------------===// + +HandleSDNode::~HandleSDNode() { + DropOperands(); +} + +GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL, + const GlobalValue *GA, + EVT VT, int64_t o, unsigned char TF) + : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { + TheGlobal = GA; +} + +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, + MachineMemOperand *mmo) + : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal()); + assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); + assert(isNonTemporal() == MMO->isNonTemporal() && + "Non-temporal encoding error!"); + assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); +} + +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, + const SDValue *Ops, unsigned NumOps, EVT memvt, + MachineMemOperand *mmo) + : SDNode(Opc, dl, VTs, Ops, NumOps), + MemoryVT(memvt), MMO(mmo) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal()); + assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); + assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); +} + +/// Profile - Gather unique data for the node. +/// +void SDNode::Profile(FoldingSetNodeID &ID) const { + AddNodeIDNode(ID, this); +} + +namespace { + struct EVTArray { + std::vector<EVT> VTs; + + EVTArray() { + VTs.reserve(MVT::LAST_VALUETYPE); + for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i) + VTs.push_back(MVT((MVT::SimpleValueType)i)); + } + }; +} + +static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs; +static ManagedStatic<EVTArray> SimpleVTArray; +static ManagedStatic<sys::SmartMutex<true> > VTMutex; + +/// getValueTypeList - Return a pointer to the specified value type. +/// +const EVT *SDNode::getValueTypeList(EVT VT) { + if (VT.isExtended()) { + sys::SmartScopedLock<true> Lock(*VTMutex); + return &(*EVTs->insert(VT).first); + } else { + assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE && + "Value type out of range!"); + return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; + } +} + +/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the +/// indicated value. This method ignores uses of other values defined by this +/// operation. +bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + // TODO: Only iterate over uses of a given value of the node + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { + if (UI.getUse().getResNo() == Value) { + if (NUses == 0) + return false; + --NUses; + } + } + + // Found exactly the right number of uses? + return NUses == 0; +} + + +/// hasAnyUseOfValue - Return true if there are any use of the indicated +/// value. This method ignores uses of other values defined by this operation. +bool SDNode::hasAnyUseOfValue(unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) + if (UI.getUse().getResNo() == Value) + return true; + + return false; +} + + +/// isOnlyUserOf - Return true if this node is the only use of N. +/// +bool SDNode::isOnlyUserOf(SDNode *N) const { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (User == this) + Seen = true; + else + return false; + } + + return Seen; +} + +/// isOperand - Return true if this node is an operand of N. +/// +bool SDValue::isOperandOf(SDNode *N) const { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (*this == N->getOperand(i)) + return true; + return false; +} + +bool SDNode::isOperandOf(SDNode *N) const { + for (unsigned i = 0, e = N->NumOperands; i != e; ++i) + if (this == N->OperandList[i].getNode()) + return true; + return false; +} + +/// reachesChainWithoutSideEffects - Return true if this operand (which must +/// be a chain) reaches the specified operand without crossing any +/// side-effecting instructions. In practice, this looks through token +/// factors and non-volatile loads. In order to remain efficient, this only +/// looks a couple of nodes in, it does not do an exhaustive search. +bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, + unsigned Depth) const { + if (*this == Dest) return true; + + // Don't search too deeply, we just want to be able to see through + // TokenFactor's etc. + if (Depth == 0) return false; + + // If this is a token factor, all inputs to the TF happen in parallel. If any + // of the operands of the TF reach dest, then we can do the xform. + if (getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) + return true; + return false; + } + + // Loads don't have side effects, look through them. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { + if (!Ld->isVolatile()) + return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); + } + return false; +} + +/// isPredecessorOf - Return true if this node is a predecessor of N. This node +/// is either an operand of N or it can be reached by traversing up the operands. +/// NOTE: this is an expensive method. Use it carefully. +bool SDNode::isPredecessorOf(SDNode *N) const { + SmallPtrSet<SDNode *, 32> Visited; + SmallVector<SDNode *, 16> Worklist; + Worklist.push_back(N); + + do { + N = Worklist.pop_back_val(); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *Op = N->getOperand(i).getNode(); + if (Op == this) + return true; + if (Visited.insert(Op)) + Worklist.push_back(Op); + } + } while (!Worklist.empty()); + + return false; +} + +uint64_t SDNode::getConstantOperandVal(unsigned Num) const { + assert(Num < NumOperands && "Invalid child # of SDNode!"); + return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); +} + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<<Unknown DAG Node>>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->get(getMachineOpcode()).getName(); + return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; + } + return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: + return "<<Deleted Node!>>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + + case ISD::Constant: return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::LSDAADDR: return "LSDAADDR"; + case ISD::EHSELECTION: return "EHSELECTION"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; + case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::BlockAddress: return "BlockAddress"; + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; + unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return Intrinsic::getName((Intrinsic::ID)IID); + else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + return TII->getName(IID); + llvm_unreachable("Invalid intrinsic ID"); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstantFP:return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + case ISD::TargetBlockAddress: return "TargetBlockAddress"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::HANDLENODE: return "handlenode"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::FRINT: return "frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + + case ISD::FPOWI: return "fpowi"; + case ISD::SETCC: return "setcc"; + case ISD::VSETCC: return "vsetcc"; + case ISD::SELECT: return "select"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BIT_CONVERT: return "bit_convert"; + case ISD::FP16_TO_FP32: return "fp16_to_fp32"; + case ISD::FP32_TO_FP16: return "fp32_to_fp16"; + + case ISD::CONVERT_RNDSAT: { + switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { + default: llvm_unreachable("Unknown cvt code!"); + case ISD::CVT_FF: return "cvt_ff"; + case ISD::CVT_FS: return "cvt_fs"; + case ISD::CVT_FU: return "cvt_fu"; + case ISD::CVT_SF: return "cvt_sf"; + case ISD::CVT_UF: return "cvt_uf"; + case ISD::CVT_SS: return "cvt_ss"; + case ISD::CVT_SU: return "cvt_su"; + case ISD::CVT_US: return "cvt_us"; + case ISD::CVT_UU: return "cvt_uu"; + } + } + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + + // Bit manipulation + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTLZ: return "ctlz"; + + // Trampolines + case ISD::TRAMPOLINE: return "trampoline"; + + case ISD::CONDCODE: + switch (cast<CondCodeSDNode>(this)->get()) { + default: llvm_unreachable("Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setue"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: + return ""; + case ISD::PRE_INC: + return "<pre-inc>"; + case ISD::PRE_DEC: + return "<pre-dec>"; + case ISD::POST_INC: + return "<post-inc>"; + case ISD::POST_DEC: + return "<post-dec>"; + } +} + +std::string ISD::ArgFlagsTy::getArgFlagsString() { + std::string S = "< "; + + if (isZExt()) + S += "zext "; + if (isSExt()) + S += "sext "; + if (isInReg()) + S += "inreg "; + if (isSRet()) + S += "sret "; + if (isByVal()) + S += "byval "; + if (isNest()) + S += "nest "; + if (getByValAlign()) + S += "byval-align:" + utostr(getByValAlign()) + " "; + if (getOrigAlign()) + S += "orig-align:" + utostr(getOrigAlign()) + " "; + if (getByValSize()) + S += "byval-size:" + utostr(getByValSize()) + " "; + return S + ">"; +} + +void SDNode::dump() const { dump(0); } +void SDNode::dump(const SelectionDAG *G) const { + print(dbgs(), G); + dbgs() << '\n'; +} + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + OS << (void*)this << ": "; + + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getEVTString(); + } + OS << " = " << getOperationName(G); +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { + if (!MN->memoperands_empty()) { + OS << "<"; + OS << "Mem:"; + for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), + e = MN->memoperands_end(); i != e; ++i) { + OS << **i; + if (llvm::next(i) != e) + OS << " "; + } + OS << ">"; + } + } else if (const ShuffleVectorSDNode *SVN = + dyn_cast<ShuffleVectorSDNode>(this)) { + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "<APFloat("; + CSDN->getValueAPF().bitcastToAPInt().dump(); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + WriteAsOperand(OS, GADN->getGlobal()); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = GADN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { + OS << "<" << JTDN->getIndex() << ">"; + if (unsigned int TF = JTDN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = CP->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { + if (G && R->getReg() && + TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg()); + } else { + OS << " %reg" << R->getReg(); + } + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(this)) { + OS << "'" << ES->getSymbol() << "'"; + if (unsigned int TF = ES->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << "<null>"; + } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << "<null>"; + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { + OS << ":" << N->getVT().getEVTString(); + } + else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { + OS << "<" << *LD->getMemOperand(); + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << LD->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { + OS << "<" << *ST->getMemOperand(); + + if (ST->isTruncatingStore()) + OS << ", trunc to " << ST->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { + OS << "<" << *M->getMemOperand() << ">"; + } else if (const BlockAddressSDNode *BA = + dyn_cast<BlockAddressSDNode>(this)) { + OS << "<"; + WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); + OS << ", "; + WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); + OS << ">"; + if (unsigned int TF = BA->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } + + if (G) + if (unsigned Order = G->GetOrdering(this)) + OS << " [ORD=" << Order << ']'; + + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; + + DebugLoc dl = getDebugLoc(); + if (G && !dl.isUnknown()) { + DIScope + Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); + OS << " dbg:"; + // Omit the directory, since it's usually long and uninteresting. + if (Scope.Verify()) + OS << Scope.getFilename(); + else + OS << "<unknown>"; + OS << ':' << dl.getLine(); + if (dl.getCol() != 0) + OS << ':' << dl.getCol(); + } +} + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; else OS << " "; + OS << (void*)getOperand(i).getNode(); + if (unsigned RN = getOperand(i).getResNo()) + OS << ":" << RN; + } + print_details(OS, G); +} + +static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, + const SelectionDAG *G, unsigned depth, + unsigned indent) +{ + if (depth == 0) + return; + + OS.indent(indent); + + N->print(OS, G); + + if (depth < 1) + return; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + OS << '\n'; + printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); + } +} + +void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, + unsigned depth) const { + printrWithDepthHelper(OS, this, G, depth, 0); +} + +void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { + // Don't print impossibly deep things. + printrWithDepth(OS, G, 100); +} + +void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { + printrWithDepth(dbgs(), G, depth); +} + +void SDNode::dumprFull(const SelectionDAG *G) const { + // Don't print impossibly deep things. + dumprWithDepth(G, 100); +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getNode()->hasOneUse()) + DumpNodes(N->getOperand(i).getNode(), indent+2, G); + else + dbgs() << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": <multiple use>"; + + + dbgs() << "\n"; + dbgs().indent(indent); + N->dump(G); +} + +SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { + assert(N->getNumValues() == 1 && + "Can't unroll a vector with multiple results!"); + + EVT VT = N->getValueType(0); + unsigned NE = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + + SmallVector<SDValue, 8> Scalars; + SmallVector<SDValue, 4> Operands(N->getNumOperands()); + + // If ResNE is 0, fully unroll the vector op. + if (ResNE == 0) + ResNE = NE; + else if (NE > ResNE) + NE = ResNE; + + unsigned i; + for (i= 0; i != NE; ++i) { + for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { + SDValue Operand = N->getOperand(j); + EVT OperandVT = Operand.getValueType(); + if (OperandVT.isVector()) { + // A vector operand; extract a single element. + EVT OperandEltVT = OperandVT.getVectorElementType(); + Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, + OperandEltVT, + Operand, + getConstant(i, MVT::i32)); + } else { + // A scalar operand; just use it as is. + Operands[j] = Operand; + } + } + + switch (N->getOpcode()) { + default: + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, + &Operands[0], Operands.size())); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], + getShiftAmountOperand(Operands[1]))); + break; + case ISD::SIGN_EXTEND_INREG: + case ISD::FP_ROUND_INREG: { + EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType(); + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, + Operands[0], + getValueType(ExtVT))); + } + } + } + + for (; i < ResNE; ++i) + Scalars.push_back(getUNDEF(EltVT)); + + return getNode(ISD::BUILD_VECTOR, dl, + EVT::getVectorVT(*getContext(), EltVT, ResNE), + &Scalars[0], Scalars.size()); +} + + +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a +/// location that is 'Dist' units away from the location that the 'Base' load +/// is loading from. +bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, + unsigned Bytes, int Dist) const { + if (LD->getChain() != Base->getChain()) + return false; + EVT VT = LD->getValueType(0); + if (VT.getSizeInBits() / 8 != Bytes) + return false; + + SDValue Loc = LD->getOperand(1); + SDValue BaseLoc = Base->getOperand(1); + if (Loc.getOpcode() == ISD::FrameIndex) { + if (BaseLoc.getOpcode() != ISD::FrameIndex) + return false; + const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo(); + int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); + int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); + int FS = MFI->getObjectSize(FI); + int BFS = MFI->getObjectSize(BFI); + if (FS != BFS || FS != (int)Bytes) return false; + return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); + } + if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1)); + if (V && (V->getSExtValue() == Dist*Bytes)) + return true; + } + + const GlobalValue *GV1 = NULL; + const GlobalValue *GV2 = NULL; + int64_t Offset1 = 0; + int64_t Offset2 = 0; + bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + if (isGA1 && isGA2 && GV1 == GV2) + return Offset1 == (Offset2 + Dist*Bytes); + return false; +} + + +/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if +/// it cannot be inferred. +unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { + // If this is a GlobalAddress + cst, return the alignment. + const GlobalValue *GV; + int64_t GVOffset = 0; + if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { + // If GV has specified alignment, then use it. Otherwise, use the preferred + // alignment. + unsigned Align = GV->getAlignment(); + if (!Align) { + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + if (GVar->hasInitializer()) { + const TargetData *TD = TLI.getTargetData(); + Align = TD->getPreferredAlignment(GVar); + } + } + } + return MinAlign(Align, GVOffset); + } + + // If this is a direct reference to a stack slot, use information about the + // stack slot's alignment. + int FrameIdx = 1 << 31; + int64_t FrameOffset = 0; + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { + FrameIdx = FI->getIndex(); + } else if (Ptr.getOpcode() == ISD::ADD && + isa<ConstantSDNode>(Ptr.getOperand(1)) && + isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + FrameOffset = Ptr.getConstantOperandVal(1); + } + + if (FrameIdx != (1 << 31)) { + // FIXME: Handle FI+CST. + const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); + unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), + FrameOffset); + return FIInfoAlign; + } + + return 0; +} + +void SelectionDAG::dump() const { + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = I; + if (!N->hasOneUse() && N != getRoot().getNode()) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + + dbgs() << "\n\n"; +} + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + print_details(OS, G); +} + +typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N)) // If we've been here before, return now. + return; + + // Dump the current SDNode, but don't end the line yet. + OS << std::string(indent, ' '); + N->printr(OS, G); + + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + + if (i) OS << ","; + OS << " "; + + if (child->getNumOperands() == 0) { + // This child has no grandchildren; print it inline right here. + child->printr(OS, G); + once.insert(child); + } else { // Just the address. FIXME: also print the child's opcode. + OS << (void*)child; + if (unsigned RN = N->getOperand(i).getResNo()) + OS << ":" << RN; + } + } + + OS << "\n"; + + // Dump children that have grandchildren on their own line(s). + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + DumpNodesr(OS, child, indent+2, G, once); + } +} + +void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, 0, once); +} + +void SDNode::dumpr(const SelectionDAG *G) const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, G, once); +} + + +// getAddressSpace - Return the address space this GlobalAddress belongs to. +unsigned GlobalAddressSDNode::getAddressSpace() const { + return getGlobal()->getType()->getAddressSpace(); +} + + +const Type *ConstantPoolSDNode::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + +bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, + APInt &SplatUndef, + unsigned &SplatBitSize, + bool &HasAnyUndefs, + unsigned MinSplatBits, + bool isBigEndian) { + EVT VT = getValueType(0); + assert(VT.isVector() && "Expected a vector type"); + unsigned sz = VT.getSizeInBits(); + if (MinSplatBits > sz) + return false; + + SplatValue = APInt(sz, 0); + SplatUndef = APInt(sz, 0); + + // Get the bits. Bits with undefined values (when the corresponding element + // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared + // in SplatValue. If any of the values are not constant, give up and return + // false. + unsigned int nOps = getNumOperands(); + assert(nOps > 0 && "isConstantSplat has 0-size build vector"); + unsigned EltBitSize = VT.getVectorElementType().getSizeInBits(); + + for (unsigned j = 0; j < nOps; ++j) { + unsigned i = isBigEndian ? nOps-1-j : j; + SDValue OpVal = getOperand(i); + unsigned BitPos = j * EltBitSize; + + if (OpVal.getOpcode() == ISD::UNDEF) + SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); + else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) + SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). + zextOrTrunc(sz) << BitPos; + else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) + SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos; + else + return false; + } + + // The build_vector is all constants or undefs. Find the smallest element + // size that splats the vector. + + HasAnyUndefs = (SplatUndef != 0); + while (sz > 8) { + + unsigned HalfSize = sz / 2; + APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize); + APInt LowValue = APInt(SplatValue).trunc(HalfSize); + APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize); + APInt LowUndef = APInt(SplatUndef).trunc(HalfSize); + + // If the two halves do not match (ignoring undef bits), stop here. + if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || + MinSplatBits > HalfSize) + break; + + SplatValue = HighValue | LowValue; + SplatUndef = HighUndef & LowUndef; + + sz = HalfSize; + } + + SplatBitSize = sz; + return true; +} + +bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { + // Find the first non-undef value in the shuffle mask. + unsigned i, e; + for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) + /* search */; + + assert(i != e && "VECTOR_SHUFFLE node with all undef indices!"); + + // Make sure all remaining elements are either undef or the same as the first + // non-undef value. + for (int Idx = Mask[i]; i != e; ++i) + if (Mask[i] >= 0 && Mask[i] != Idx) + return false; + return true; +} + +#ifdef XDEBUG +static void checkForCyclesHelper(const SDNode *N, + SmallPtrSet<const SDNode*, 32> &Visited, + SmallPtrSet<const SDNode*, 32> &Checked) { + // If this node has already been checked, don't check it again. + if (Checked.count(N)) + return; + + // If a node has already been visited on this depth-first walk, reject it as + // a cycle. + if (!Visited.insert(N)) { + dbgs() << "Offending node:\n"; + N->dumprFull(); + errs() << "Detected cycle in SelectionDAG\n"; + abort(); + } + + for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked); + + Checked.insert(N); + Visited.erase(N); +} +#endif + +void llvm::checkForCycles(const llvm::SDNode *N) { +#ifdef XDEBUG + assert(N && "Checking nonexistant SDNode"); + SmallPtrSet<const SDNode*, 32> visited; + SmallPtrSet<const SDNode*, 32> checked; + checkForCyclesHelper(N, visited, checked); +#endif +} + +void llvm::checkForCycles(const llvm::SelectionDAG *DAG) { + checkForCycles(DAG->getRoot().getNode()); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp new file mode 100644 index 0000000..e657445 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -0,0 +1,6253 @@ +//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "SDNodeDbgValue.h" +#include "SelectionDAGBuilder.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Constants.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +using namespace llvm; + +/// LimitFloatPrecision - Generate low-precision inline sequences for +/// some float libcalls (6, 8 or 12 bits). +static unsigned LimitFloatPrecision; + +static cl::opt<unsigned, true> +LimitFPPrecision("limit-float-precision", + cl::desc("Generate low-precision inline sequences " + "for some float libcalls"), + cl::location(LimitFloatPrecision), + cl::init(0)); + +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, unsigned NumParts, + EVT PartVT, EVT ValueVT); + +/// getCopyFromParts - Create a value that contains the specified legal parts +/// combined into the value they represent. If the parts combine to a type +/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT +/// (ISD::AssertSext). +static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, + unsigned NumParts, EVT PartVT, EVT ValueVT, + ISD::NodeType AssertOp = ISD::DELETED_NODE) { + if (ValueVT.isVector()) + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); + + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + if (NumParts > 1) { + // Assemble the value from multiple parts. + if (ValueVT.isInteger()) { + unsigned PartBits = PartVT.getSizeInBits(); + unsigned ValueBits = ValueVT.getSizeInBits(); + + // Assemble the power of 2 part. + unsigned RoundParts = NumParts & (NumParts - 1) ? + 1 << Log2_32(NumParts) : NumParts; + unsigned RoundBits = PartBits * RoundParts; + EVT RoundVT = RoundBits == ValueBits ? + ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); + SDValue Lo, Hi; + + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); + + if (RoundParts > 2) { + Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, + PartVT, HalfVT); + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, + RoundParts / 2, PartVT, HalfVT); + } else { + Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]); + } + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); + + if (RoundParts < NumParts) { + // Assemble the trailing non-power-of-2 part. + unsigned OddParts = NumParts - RoundParts; + EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); + Hi = getCopyFromParts(DAG, DL, + Parts + RoundParts, OddParts, PartVT, OddVT); + + // Combine the round and odd parts. + Lo = Val; + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); + Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, + DAG.getConstant(Lo.getValueType().getSizeInBits(), + TLI.getPointerTy())); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); + Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); + } + } else if (PartVT.isFloatingPoint()) { + // FP split into multiple FP parts (for ppcf128) + assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && + "Unexpected split"); + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); + } else { + // FP split into integer parts (soft fp) + assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && + !PartVT.isVector() && "Unexpected split"); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT); + } + } + + // There is now one part, held in Val. Correct it to match ValueVT. + PartVT = Val.getValueType(); + + if (PartVT == ValueVT) + return Val; + + if (PartVT.isInteger() && ValueVT.isInteger()) { + if (ValueVT.bitsLT(PartVT)) { + // For a truncate, see if we have any information to + // indicate whether the truncated bits will always be + // zero or sign-extension. + if (AssertOp != ISD::DELETED_NODE) + Val = DAG.getNode(AssertOp, DL, PartVT, Val, + DAG.getValueType(ValueVT)); + return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); + } + + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + // FP_ROUND's are always exact here. + if (ValueVT.bitsLT(Val.getValueType())) + return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, + DAG.getIntPtrConstant(1)); + + return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); + } + + if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) + return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); + + llvm_unreachable("Unknown mismatch!"); + return SDValue(); +} + +/// getCopyFromParts - Create a value that contains the specified legal parts +/// combined into the value they represent. If the parts combine to a type +/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT +/// (ISD::AssertSext). +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, unsigned NumParts, + EVT PartVT, EVT ValueVT) { + assert(ValueVT.isVector() && "Not a vector value"); + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + // Handle a multi-element vector. + if (NumParts > 1) { + EVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + assert(RegisterVT == Parts[0].getValueType() && + "Part type doesn't match part!"); + + // Assemble the parts into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + if (NumIntermediates == NumParts) { + // If the register was not expanded, truncate or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, + PartVT, IntermediateVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, build the intermediate + // operands from the parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, + PartVT, IntermediateVT); + } + + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the + // intermediate operands. + Val = DAG.getNode(IntermediateVT.isVector() ? + ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, + ValueVT, &Ops[0], NumIntermediates); + } + + // There is now one part, held in Val. Correct it to match ValueVT. + PartVT = Val.getValueType(); + + if (PartVT == ValueVT) + return Val; + + if (PartVT.isVector()) { + // If the element type of the source/dest vectors are the same, but the + // parts vector has more elements than the value vector, then we have a + // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the + // elements we want. + if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) { + assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + "Cannot narrow, it would be a lossy transformation"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getIntPtrConstant(0)); + } + + // Vector/Vector bitcast. + return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); + } + + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial scalar-to-vector conversions should get here!"); + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); +} + + + + +static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, + SDValue Val, SDValue *Parts, unsigned NumParts, + EVT PartVT); + +/// getCopyToParts - Create a series of nodes that contain the specified value +/// split into legal parts. If the parts contain more bits than Val, then, for +/// integers, ExtendKind can be used to specify how to generate the extra bits. +static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, + SDValue Val, SDValue *Parts, unsigned NumParts, + EVT PartVT, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + EVT ValueVT = Val.getValueType(); + + // Handle the vector case separately. + if (ValueVT.isVector()) + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned PartBits = PartVT.getSizeInBits(); + unsigned OrigNumParts = NumParts; + assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); + + if (NumParts == 0) + return; + + assert(!ValueVT.isVector() && "Vector case handled elsewhere"); + if (PartVT == ValueVT) { + assert(NumParts == 1 && "No-op copy with multiple parts!"); + Parts[0] = Val; + return; + } + + if (NumParts * PartBits > ValueVT.getSizeInBits()) { + // If the parts cover more bits than the value has, promote the value. + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + assert(NumParts == 1 && "Do not know what to promote to!"); + Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); + } else { + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Unknown mismatch!"); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); + } + } else if (PartBits == ValueVT.getSizeInBits()) { + // Different types of the same size. + assert(NumParts == 1 && PartVT != ValueVT); + Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { + // If the parts cover less bits than value has, truncate the value. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Unknown mismatch!"); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + // The value may have changed - recompute ValueVT. + ValueVT = Val.getValueType(); + assert(NumParts * PartBits == ValueVT.getSizeInBits() && + "Failed to tile the value with PartVT!"); + + if (NumParts == 1) { + assert(PartVT == ValueVT && "Type conversion failed!"); + Parts[0] = Val; + return; + } + + // Expand the value into multiple parts. + if (NumParts & (NumParts - 1)) { + // The number of parts is not a power of 2. Split off and copy the tail. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Do not know what to expand to!"); + unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundBits = RoundParts * PartBits; + unsigned OddParts = NumParts - RoundParts; + SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, + DAG.getIntPtrConstant(RoundBits)); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT); + + if (TLI.isBigEndian()) + // The odd parts were reversed by getCopyToParts - unreverse them. + std::reverse(Parts + RoundParts, Parts + NumParts); + + NumParts = RoundParts; + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + // The number of parts is a power of 2. Repeatedly bisect the value using + // EXTRACT_ELEMENT. + Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL, + EVT::getIntegerVT(*DAG.getContext(), + ValueVT.getSizeInBits()), + Val); + + for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { + for (unsigned i = 0; i < NumParts; i += StepSize) { + unsigned ThisBits = StepSize * PartBits / 2; + EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); + SDValue &Part0 = Parts[i]; + SDValue &Part1 = Parts[i+StepSize/2]; + + Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(1)); + Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(0)); + + if (ThisBits == PartBits && ThisVT != PartVT) { + Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0); + Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1); + } + } + } + + if (TLI.isBigEndian()) + std::reverse(Parts, Parts + OrigNumParts); +} + + +/// getCopyToPartsVector - Create a series of nodes that contain the specified +/// value split into legal parts. +static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, + SDValue Val, SDValue *Parts, unsigned NumParts, + EVT PartVT) { + EVT ValueVT = Val.getValueType(); + assert(ValueVT.isVector() && "Not a vector"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (NumParts == 1) { + if (PartVT == ValueVT) { + // Nothing to do. + } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { + // Bitconvert vector->vector case. + Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + } else if (PartVT.isVector() && + PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { + EVT ElementVT = PartVT.getVectorElementType(); + // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in + // undef elements. + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + ElementVT, Val, DAG.getIntPtrConstant(i))); + + for (unsigned i = ValueVT.getVectorNumElements(), + e = PartVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getUNDEF(ElementVT)); + + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); + + // FIXME: Use CONCAT for 2x -> 4x. + + //SDValue UndefElts = DAG.getUNDEF(VectorTy); + //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); + } else { + // Vector -> scalar conversion. + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial vector-to-scalar conversions should get here!"); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + PartVT, Val, DAG.getIntPtrConstant(0)); + } + + Parts[0] = Val; + return; + } + + // Handle a multi-element vector. + EVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, + IntermediateVT, + NumIntermediates, RegisterVT); + unsigned NumElements = ValueVT.getVectorNumElements(); + + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + + // Split the vector into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + for (unsigned i = 0; i != NumIntermediates; ++i) { + if (IntermediateVT.isVector()) + Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, + IntermediateVT, Val, + DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); + else + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + IntermediateVT, Val, DAG.getIntPtrConstant(i)); + } + + // Split the intermediate operands into legal parts. + if (NumParts == NumIntermediates) { + // If the register was not expanded, promote or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, split each the value into + // legal parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT); + } +} + + + + +namespace { + /// RegsForValue - This struct represents the registers (physical or virtual) + /// that a particular set of values is assigned, and the type information + /// about the value. The most common situation is to represent one value at a + /// time, but struct or array values are handled element-wise as multiple + /// values. The splitting of aggregates is performed recursively, so that we + /// never have aggregate-typed registers. The values at this point do not + /// necessarily have legal types, so each value may require one or more + /// registers of some legal type. + /// + struct RegsForValue { + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector<EVT, 4> ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector<EVT, 4> RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector<unsigned, 4> Regs; + + RegsForValue() {} + + RegsForValue(const SmallVector<unsigned, 4> ®s, + EVT regvt, EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + + RegsForValue(LLVMContext &Context, const TargetLowering &tli, + unsigned Reg, const Type *Ty) { + ComputeValueVTs(tli, Ty, ValueVTs); + + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); + EVT RegisterVT = tli.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } + } + + /// areValueTypesLegal - Return true if types of all the values are legal. + bool areValueTypesLegal(const TargetLowering &TLI) { + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT RegisterVT = RegVTs[Value]; + if (!TLI.isTypeLegal(RegisterVT)) + return false; + } + return true; + } + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the + /// specified value into the registers specified by this object. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Kind, + bool HasMatching, unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const; + }; +} + +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, + FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + // A Value with type {} or [0 x %t] needs no registers. + if (ValueVTs.empty()) + return SDValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Assemble the legal parts into the final values. + SmallVector<SDValue, 4> Values(ValueVTs.size()); + SmallVector<SDValue, 8> Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; + + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (Flag == 0) { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + } else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } + + Chain = P.getValue(1); + + // If the source register was virtual and if we know something about it, + // add an assert node. + if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && + RegisterVT.isInteger() && !RegisterVT.isVector()) { + unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; + if (FuncInfo.LiveOutRegInfo.size() > SlotNo) { + const FunctionLoweringInfo::LiveOutInfo &LOI = + FuncInfo.LiveOutRegInfo[SlotNo]; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI.NumSignBits; + unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + EVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + + if (FromVT != MVT::Other) + P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + } + } + + Parts[i] = P; + } + + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), + NumRegs, RegisterVT, ValueVT); + Part += NumRegs; + Parts.clear(); + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Values[0], ValueVTs.size()); +} + +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; + + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), + &Parts[Part], NumParts, RegisterVT); + Part += NumParts; + } + + // Copy the parts into the registers. + SmallVector<SDValue, 8> Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (Flag == 0) { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); + } else { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); + } + + Chains[i] = Part.getValue(0); + } + + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); +} + +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); + if (HasMatching) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); + SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); + Ops.push_back(Res); + + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + EVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + } + } +} + +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { + AA = &aa; + GFI = gfi; + TD = DAG.getTarget().getTargetData(); +} + +/// clear - Clear out the current SelectionDAG and the associated +/// state and prepare this SelectionDAGBuilder object to be used +/// for a new block. This doesn't clear out information about +/// additional blocks that are needed to complete switch lowering +/// or PHI node updating; that information is cleared out as it is +/// consumed. +void SelectionDAGBuilder::clear() { + NodeMap.clear(); + UnusedArgNodeMap.clear(); + PendingLoads.clear(); + PendingExports.clear(); + DanglingDebugInfoMap.clear(); + CurDebugLoc = DebugLoc(); + HasTailCall = false; +} + +/// getRoot - Return the current virtual root of the Selection DAG, +/// flushing any PendingLoad items. This must be done before emitting +/// a store or any other node that may need to be ordered after any +/// prior load instructions. +/// +SDValue SelectionDAGBuilder::getRoot() { + if (PendingLoads.empty()) + return DAG.getRoot(); + + if (PendingLoads.size() == 1) { + SDValue Root = PendingLoads[0]; + DAG.setRoot(Root); + PendingLoads.clear(); + return Root; + } + + // Otherwise, we have to make a token factor node. + SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingLoads[0], PendingLoads.size()); + PendingLoads.clear(); + DAG.setRoot(Root); + return Root; +} + +/// getControlRoot - Similar to getRoot, but instead of flushing all the +/// PendingLoad items, flush all the PendingExports items. It is necessary +/// to do this before emitting a terminator instruction. +/// +SDValue SelectionDAGBuilder::getControlRoot() { + SDValue Root = DAG.getRoot(); + + if (PendingExports.empty()) + return Root; + + // Turn all of the CopyToReg chains into one factored node. + if (Root.getOpcode() != ISD::EntryToken) { + unsigned i = 0, e = PendingExports.size(); + for (; i != e; ++i) { + assert(PendingExports[i].getNode()->getNumOperands() > 1); + if (PendingExports[i].getNode()->getOperand(0) == Root) + break; // Don't add the root if we already indirectly depend on it. + } + + if (i == e) + PendingExports.push_back(Root); + } + + Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingExports[0], + PendingExports.size()); + PendingExports.clear(); + DAG.setRoot(Root); + return Root; +} + +void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) { + if (DAG.GetOrdering(Node) != 0) return; // Already has ordering. + DAG.AssignOrdering(Node, SDNodeOrder); + + for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) + AssignOrderingToNode(Node->getOperand(I).getNode()); +} + +void SelectionDAGBuilder::visit(const Instruction &I) { + // Set up outgoing PHI node register values before emitting the terminator. + if (isa<TerminatorInst>(&I)) + HandlePHINodesInSuccessorBlocks(I.getParent()); + + CurDebugLoc = I.getDebugLoc(); + + visit(I.getOpcode(), I); + + if (!isa<TerminatorInst>(&I) && !HasTailCall) + CopyToExportRegsIfNeeded(&I); + + CurDebugLoc = DebugLoc(); +} + +void SelectionDAGBuilder::visitPHI(const PHINode &) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); +} + +void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { + // Note: this doesn't use InstVisitor, because it has to work with + // ConstantExpr's in addition to instructions. + switch (Opcode) { + default: llvm_unreachable("Unknown instruction type encountered!"); + // Build the switch statement using the Instruction.def file. +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break; +#include "llvm/Instruction.def" + } + + // Assign the ordering to the freshly created DAG nodes. + if (NodeMap.count(&I)) { + ++SDNodeOrder; + AssignOrderingToNode(getValue(&I).getNode()); + } +} + +// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, +// generate the debug data structures now that we've seen its definition. +void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, + SDValue Val) { + DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; + if (DDI.getDI()) { + const DbgValueInst *DI = DDI.getDI(); + DebugLoc dl = DDI.getdl(); + unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); + MDNode *Variable = DI->getVariable(); + uint64_t Offset = DI->getOffset(); + SDDbgValue *SDV; + if (Val.getNode()) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) { + SDV = DAG.getDbgValue(Variable, Val.getNode(), + Val.getResNo(), Offset, dl, DbgSDNodeOrder); + DAG.AddDbgValue(SDV, Val.getNode(), false); + } + } else { + SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), + Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } + DanglingDebugInfoMap[V] = DanglingDebugInfo(); + } +} + +// getValue - Return an SDValue for the given Value. +SDValue SelectionDAGBuilder::getValue(const Value *V) { + // If we already have an SDValue for this value, use it. It's important + // to do this first, so that we don't create a CopyFromReg if we already + // have a regular SDValue. + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + // If there's a virtual register allocated and initialized for this + // value, use it. + DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL); + } + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); + return Val; +} + +/// getNonRegisterValue - Return an SDValue for the given Value, but +/// don't look in FuncInfo.ValueMap for a virtual register. +SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { + // If we already have an SDValue for this value, use it. + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); + return Val; +} + +/// getValueImpl - Helper function for getValue and getNonRegisterValue. +/// Create an SDValue for the given value. +SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { + if (const Constant *C = dyn_cast<Constant>(V)) { + EVT VT = TLI.getValueType(V->getType(), true); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) + return DAG.getConstant(*CI, VT); + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT); + + if (isa<ConstantPointerNull>(C)) + return DAG.getConstant(0, TLI.getPointerTy()); + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) + return DAG.getConstantFP(*CFP, VT); + + if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) + return DAG.getUNDEF(VT); + + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + visit(CE->getOpcode(), *CE); + SDValue N1 = NodeMap[V]; + assert(N1.getNode() && "visit didn't populate the NodeMap!"); + return N1; + } + + if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) { + SmallVector<SDValue, 4> Constants; + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) { + SDNode *Val = getValue(*OI).getNode(); + // If the operand is an empty aggregate, there are no values. + if (!Val) continue; + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Constants.push_back(SDValue(Val, i)); + } + + return DAG.getMergeValues(&Constants[0], Constants.size(), + getCurDebugLoc()); + } + + if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { + assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && + "Unknown struct or array constant!"); + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, C->getType(), ValueVTs); + unsigned NumElts = ValueVTs.size(); + if (NumElts == 0) + return SDValue(); // empty struct + SmallVector<SDValue, 4> Constants(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + EVT EltVT = ValueVTs[i]; + if (isa<UndefValue>(C)) + Constants[i] = DAG.getUNDEF(EltVT); + else if (EltVT.isFloatingPoint()) + Constants[i] = DAG.getConstantFP(0, EltVT); + else + Constants[i] = DAG.getConstant(0, EltVT); + } + + return DAG.getMergeValues(&Constants[0], NumElts, + getCurDebugLoc()); + } + + if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) + return DAG.getBlockAddress(BA, VT); + + const VectorType *VecTy = cast<VectorType>(V->getType()); + unsigned NumElements = VecTy->getNumElements(); + + // Now that we know the number and type of the elements, get that number of + // elements into the Ops array based on what kind of constant it is. + SmallVector<SDValue, 16> Ops; + if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) { + for (unsigned i = 0; i != NumElements; ++i) + Ops.push_back(getValue(CP->getOperand(i))); + } else { + assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); + EVT EltVT = TLI.getValueType(VecTy->getElementType()); + + SDValue Op; + if (EltVT.isFloatingPoint()) + Op = DAG.getConstantFP(0, EltVT); + else + Op = DAG.getConstant(0, EltVT); + Ops.assign(NumElements, Op); + } + + // Create a BUILD_VECTOR node. + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); + } + + // If this is a static alloca, generate it as the frameindex instead of + // computation. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + } + + // If this is an instruction which fast-isel has deferred, select it now. + if (const Instruction *Inst = dyn_cast<Instruction>(V)) { + unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + } + + llvm_unreachable("Can't get register for value!"); + return SDValue(); +} + +void SelectionDAGBuilder::visitRet(const ReturnInst &I) { + SDValue Chain = getControlRoot(); + SmallVector<ISD::OutputArg, 8> Outs; + SmallVector<SDValue, 8> OutVals; + + if (!FuncInfo.CanLowerReturn) { + unsigned DemoteReg = FuncInfo.DemoteRegister; + const Function *F = I.getParent()->getParent(); + + // Emit a store of the return value through the virtual register. + // Leave Outs empty so that LowerReturn won't try to load return + // registers the usual way. + SmallVector<EVT, 1> PtrValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), + PtrValueVTs); + + SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); + SDValue RetOp = getValue(I.getOperand(0)); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + + SmallVector<SDValue, 4> Chains(NumValues); + for (unsigned i = 0; i != NumValues; ++i) { + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), + RetPtr.getValueType(), RetPtr, + DAG.getIntPtrConstant(Offsets[i])); + Chains[i] = + DAG.getStore(Chain, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + i), + Add, NULL, Offsets[i], false, false, 0); + } + + Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + } else if (I.getNumOperands() != 0) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues) { + SDValue RetOp = getValue(I.getOperand(0)); + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + const Function *F = I.getParent()->getParent(); + if (F->paramHasAttr(0, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->paramHasAttr(0, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted + // to at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); + EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + SmallVector<SDValue, 4> Parts(NumParts); + getCopyToParts(DAG, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + j), + &Parts[0], NumParts, PartVT, ExtendKind); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (F->paramHasAttr(0, Attribute::InReg)) + Flags.setInReg(); + + // Propagate extension type if any + if (F->paramHasAttr(0, Attribute::SExt)) + Flags.setSExt(); + else if (F->paramHasAttr(0, Attribute::ZExt)) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), + /*isfixed=*/true)); + OutVals.push_back(Parts[i]); + } + } + } + } + + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CallingConv::ID CallConv = + DAG.getMachineFunction().getFunction()->getCallingConv(); + Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, + Outs, OutVals, getCurDebugLoc(), DAG); + + // Verify that the target's LowerReturn behaved as expected. + assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + "LowerReturn didn't return a valid chain!"); + + // Update the DAG with the new chain value resulting from return lowering. + DAG.setRoot(Chain); +} + +/// CopyToExportRegsIfNeeded - If the given value has virtual registers +/// created for it, emit nodes to copy the value into the virtual +/// registers. +void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + assert(!V->use_empty() && "Unused value assigned virtual registers!"); + CopyValueToVirtualRegister(V, VMI->second); + } +} + +/// ExportFromCurrentBlock - If this condition isn't known to be exported from +/// the current basic block, add it to ValueMap now so that we'll get a +/// CopyTo/FromReg. +void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { + // No need to export constants. + if (!isa<Instruction>(V) && !isa<Argument>(V)) return; + + // Already exported? + if (FuncInfo.isExportedInst(V)) return; + + unsigned Reg = FuncInfo.InitializeRegForValue(V); + CopyValueToVirtualRegister(V, Reg); +} + +bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, + const BasicBlock *FromBB) { + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. + if (const Instruction *VI = dyn_cast<Instruction>(V)) { + // Can export from current BB. + if (VI->getParent() == FromBB) + return true; + + // Is already exported, noop. + return FuncInfo.isExportedInst(V); + } + + // If this is an argument, we can export it if the BB is the entry block or + // if it is already exported. + if (isa<Argument>(V)) { + if (FromBB == &FromBB->getParent()->getEntryBlock()) + return true; + + // Otherwise, can only export this if it is already exported. + return FuncInfo.isExportedInst(V); + } + + // Otherwise, constants can always be exported. + return true; +} + +static bool InBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == BB; + return true; +} + +/// EmitBranchForMergedCondition - Helper method for FindMergedConditions. +/// This function emits a branch and is used at the leaves of an OR or an +/// AND operator tree. +/// +void +SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB) { + const BasicBlock *BB = CurBB->getBasicBlock(); + + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { + // The operands of the cmp have to be in this block. We don't know + // how to export them from some other block. If this is the first block + // of the sequence, no exporting is needed. + if (CurBB == SwitchBB || + (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && + isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { + ISD::CondCode Condition; + if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + Condition = getICmpCondCode(IC->getPredicate()); + } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { + Condition = getFCmpCondCode(FC->getPredicate()); + } else { + Condition = ISD::SETEQ; // silence warning. + llvm_unreachable("Unknown compare instruction"); + } + + CaseBlock CB(Condition, BOp->getOperand(0), + BOp->getOperand(1), NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); + return; + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), + NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); +} + +/// FindMergedConditions - If Cond is an expression like +void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + unsigned Opc) { + // If this node is not part of the or/and tree, emit it as a branch. + const Instruction *BOp = dyn_cast<Instruction>(Cond); + if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || + (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI = CurBB; + MachineFunction &MF = DAG.getMachineFunction(); + MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + } +} + +/// If the set of cases should be emitted as a series of branches, return true. +/// If we should emit this as a bunch of and/or'd together conditions, return +/// false. +bool +SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ + if (Cases.size() != 2) return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].CC == Cases[1].CC && + isa<Constant>(Cases[0].CmpRHS) && + cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + + return true; +} + +void SelectionDAGBuilder::visitBr(const BranchInst &I) { + MachineBasicBlock *BrMBB = FuncInfo.MBB; + + // Update machine-CFG edges. + MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = BrMBB; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + if (I.isUnconditional()) { + // Update machine-CFG edges. + BrMBB->addSuccessor(Succ0MBB); + + // If this is not a fall-through branch, emit the branch. + if (Succ0MBB != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Succ0MBB))); + + return; + } + + // If this condition is one of the special cases we handle, do special stuff + // now. + const Value *CondVal = I.getCondition(); + MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; + + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + // + if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { + if (BOp->hasOneUse() && + (BOp->getOpcode() == Instruction::And || + BOp->getOpcode() == Instruction::Or)) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, + BOp->getOpcode()); + // If the compares in later blocks need to use values not currently + // exported from this block, export them now. This block should always + // be the first entry. + assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (ShouldEmitAsBranches(SwitchCases)) { + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SwitchCases[i].CmpRHS); + } + + // Emit the branch for this block. + visitSwitchCase(SwitchCases[0], BrMBB); + SwitchCases.erase(SwitchCases.begin()); + return; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) + FuncInfo.MF->erase(SwitchCases[i].ThisBB); + + SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), + NULL, Succ0MBB, Succ1MBB, BrMBB); + + // Use visitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + visitSwitchCase(CB, BrMBB); +} + +/// visitSwitchCase - Emits the necessary code to represent a single node in +/// the binary search tree resulting from lowering a switch instruction. +void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB) { + SDValue Cond; + SDValue CondLHS = getValue(CB.CmpLHS); + DebugLoc dl = getCurDebugLoc(); + + // Build the setcc now. + if (CB.CmpMHS == NULL) { + // Fold "(X == true)" to X and "(X == false)" to !X to + // handle common cases produced by branch lowering. + if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && + CB.CC == ISD::SETEQ) + Cond = CondLHS; + else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && + CB.CC == ISD::SETEQ) { + SDValue True = DAG.getConstant(1, CondLHS.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); + } else + Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); + } else { + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); + + const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + + SDValue CmpOp = getValue(CB.CmpMHS); + EVT VT = CmpOp.getValueType(); + + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), + ISD::SETLE); + } else { + SDValue SUB = DAG.getNode(ISD::SUB, dl, + VT, CmpOp, DAG.getConstant(Low, VT)); + Cond = DAG.getSetCC(dl, MVT::i1, SUB, + DAG.getConstant(High-Low, VT), ISD::SETULE); + } + } + + // Update successor info + SwitchBB->addSuccessor(CB.TrueBB); + SwitchBB->addSuccessor(CB.FalseBB); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + // If the lhs block is the next block, invert the condition so that we can + // fall through to the lhs instead of the rhs block. + if (CB.TrueBB == NextBlock) { + std::swap(CB.TrueBB, CB.FalseBB); + SDValue True = DAG.getConstant(1, Cond.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); + } + + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(CB.TrueBB)); + + // Insert the false branch. + if (CB.FalseBB != NextBlock) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); + + DAG.setRoot(BrCond); +} + +/// visitJumpTable - Emit JumpTable node in the current MBB +void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { + // Emit the code for the jump table + assert(JT.Reg != -1U && "Should lower JT Header first!"); + EVT PTy = TLI.getPointerTy(); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + JT.Reg, PTy); + SDValue Table = DAG.getJumpTable(JT.JTI, PTy); + SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(), + MVT::Other, Index.getValue(1), + Table, Index); + DAG.setRoot(BrJumpTable); +} + +/// visitJumpTableHeader - This function emits necessary code to produce index +/// in the JumpTable from switch case. +void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, + JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB) { + // Subtract the lowest switch case value from the value being switched on and + // conditional branch to default mbb if the result is greater than the + // difference between smallest and largest cases. + SDValue SwitchOp = getValue(JTH.SValue); + EVT VT = SwitchOp.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(JTH.First, VT)); + + // The SDNode we just created, which holds the value being switched on minus + // the smallest case value, needs to be copied to a virtual register so it + // can be used as an index into the jump table in a subsequent basic block. + // This value may be smaller or larger than the target's pointer type, and + // therefore require extension or truncating. + SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); + + unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + JumpTableReg, SwitchOp); + JT.Reg = JumpTableReg; + + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the largest + // case in the switch. + SDValue CMP = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(Sub.getValueType()), Sub, + DAG.getConstant(JTH.Last-JTH.First,VT), + ISD::SETUGT); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; + + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, CMP, + DAG.getBasicBlock(JT.Default)); + + if (JT.MBB != NextBlock) + BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, + DAG.getBasicBlock(JT.MBB)); + + DAG.setRoot(BrCond); +} + +/// visitBitTestHeader - This function emits necessary code to produce value +/// suitable for "bit tests" +void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, + MachineBasicBlock *SwitchBB) { + // Subtract the minimum value + SDValue SwitchOp = getValue(B.SValue); + EVT VT = SwitchOp.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(B.First, VT)); + + // Check range + SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(Sub.getValueType()), + Sub, DAG.getConstant(B.Range, VT), + ISD::SETUGT); + + SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), + TLI.getPointerTy()); + + B.Reg = FuncInfo.CreateReg(TLI.getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + B.Reg, ShiftOp); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + MachineBasicBlock* MBB = B.Cases[0].ThisBB; + + SwitchBB->addSuccessor(B.Default); + SwitchBB->addSuccessor(MBB); + + SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, RangeCmp, + DAG.getBasicBlock(B.Default)); + + if (MBB != NextBlock) + BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, + DAG.getBasicBlock(MBB)); + + DAG.setRoot(BrRange); +} + +/// visitBitTestCase - this function produces one "bit test" +void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + BitTestCase &B, + MachineBasicBlock *SwitchBB) { + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, + TLI.getPointerTy()); + SDValue Cmp; + if (CountPopulation_64(B.Mask) == 1) { + // Testing for a single bit; just compare the shift count with what it + // would need to be to shift a 1 bit in that position. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(ShiftOp.getValueType()), + ShiftOp, + DAG.getConstant(CountTrailingZeros_64(B.Mask), + TLI.getPointerTy()), + ISD::SETEQ); + } else { + // Make desired shift + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(1, TLI.getPointerTy()), + ShiftOp); + + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + TLI.getPointerTy(), SwitchVal, + DAG.getConstant(B.Mask, TLI.getPointerTy())); + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(AndOp.getValueType()), + AndOp, DAG.getConstant(0, TLI.getPointerTy()), + ISD::SETNE); + } + + SwitchBB->addSuccessor(B.TargetBB); + SwitchBB->addSuccessor(NextMBB); + + SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, getControlRoot(), + Cmp, DAG.getBasicBlock(B.TargetBB)); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + if (NextMBB != NextBlock) + BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, + DAG.getBasicBlock(NextMBB)); + + DAG.setRoot(BrAnd); +} + +void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { + MachineBasicBlock *InvokeMBB = FuncInfo.MBB; + + // Retrieve successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; + MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + + const Value *Callee(I.getCalledValue()); + if (isa<InlineAsm>(Callee)) + visitInlineAsm(&I); + else + LowerCallTo(&I, getValue(Callee), false, LandingPad); + + // If the value of the invoke is used outside of its defining block, make it + // available as a virtual register. + CopyToExportRegsIfNeeded(&I); + + // Update successor info + InvokeMBB->addSuccessor(Return); + InvokeMBB->addSuccessor(LandingPad); + + // Drop into normal successor. + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); +} + +void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) { +} + +/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for +/// small case ranges). +bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock *Default, + MachineBasicBlock *SwitchBB) { + Case& BackCase = *(CR.Range.second-1); + + // Size is the number of Cases represented by this range. + size_t Size = CR.Range.second - CR.Range.first; + if (Size > 3) + return false; + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + // TODO: If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + + // Rearrange the case blocks so that the last one falls through if possible. + if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { + // The last case block won't fall through into 'NextBlock' if we emit the + // branches in this order. See if rearranging a case value would help. + for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) { + if (I->BB == NextBlock) { + std::swap(*I, BackCase); + break; + } + } + } + + // Create a CaseBlock record representing a conditional branch to + // the Case's target mbb if the value being switched on SV is equal + // to C. + MachineBasicBlock *CurBlock = CR.CaseBB; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + MachineBasicBlock *FallThrough; + if (I != E-1) { + FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); + CurMF->insert(BBI, FallThrough); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } else { + // If the last case doesn't match, go to the default block. + FallThrough = Default; + } + + const Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->High == I->Low) { + // This is just small small case range :) containing exactly 1 case + CC = ISD::SETEQ; + LHS = SV; RHS = I->High; MHS = NULL; + } else { + CC = ISD::SETLE; + LHS = I->Low; MHS = SV; RHS = I->High; + } + CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock); + + // If emitting the first comparison, just call visitSwitchCase to emit the + // code into the current block. Otherwise, push the CaseBlock onto the + // vector to be later processed by SDISel, and insert the node's MBB + // before the next MBB. + if (CurBlock == SwitchBB) + visitSwitchCase(CB, SwitchBB); + else + SwitchCases.push_back(CB); + + CurBlock = FallThrough; + } + + return true; +} + +static inline bool areJTsAllowed(const TargetLowering &TLI) { + return !DisableJumpTables && + (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + +static APInt ComputeRange(const APInt &First, const APInt &Last) { + APInt LastExt(Last), FirstExt(First); + uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; + LastExt.sext(BitWidth); FirstExt.sext(BitWidth); + return (LastExt - FirstExt + 1ULL); +} + +/// handleJTSwitchCase - Emit jumptable for current switch case range +bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB) { + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); + + APInt TSize(First.getBitWidth(), 0); + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); + + if (!areJTsAllowed(TLI) || TSize.ult(4)) + return false; + + APInt Range = ComputeRange(First, Last); + double Density = TSize.roundToDouble() / Range.roundToDouble(); + if (Density < 0.4) + return false; + + DEBUG(dbgs() << "Lowering jump table\n" + << "First entry: " << First << ". Last entry: " << Last << '\n' + << "Range: " << Range + << "Size: " << TSize << ". Density: " << Density << "\n\n"); + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; + + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Create a new basic block to hold the code for loading the address + // of the jump table, and jumping to it. Update successor information; + // we will either branch to the default case for the switch, or the jump + // table. + MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, JumpTableBB); + CR.CaseBB->addSuccessor(Default); + CR.CaseBB->addSuccessor(JumpTableBB); + + // Build a vector of destination BBs, corresponding to each target + // of the jump table. If the value of the jump table slot corresponds to + // a case statement, push the case's BB onto the vector, otherwise, push + // the default BB. + std::vector<MachineBasicBlock*> DestBBs; + APInt TEI = First; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { + const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); + const APInt &High = cast<ConstantInt>(I->High)->getValue(); + + if (Low.sle(TEI) && TEI.sle(High)) { + DestBBs.push_back(I->BB); + if (TEI==High) + ++I; + } else { + DestBBs.push_back(Default); + } + } + + // Update successor info. Add one edge to each unique successor. + BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); + for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), + E = DestBBs.end(); I != E; ++I) { + if (!SuccsHandled[(*I)->getNumber()]) { + SuccsHandled[(*I)->getNumber()] = true; + JumpTableBB->addSuccessor(*I); + } + } + + // Create a jump table index for this jump table. + unsigned JTEncoding = TLI.getJumpTableEncoding(); + unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) + ->createJumpTableIndex(DestBBs); + + // Set the jump table information so that we can codegen it as a second + // MachineBasicBlock + JumpTable JT(-1U, JTI, JumpTableBB, Default); + JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); + if (CR.CaseBB == SwitchBB) + visitJumpTableHeader(JT, JTH, SwitchBB); + + JTCases.push_back(JumpTableBlock(JTH, JT)); + + return true; +} + +/// handleBTSplitSwitchCase - emit comparison and split binary search tree into +/// 2 subtrees. +bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock *Default, + MachineBasicBlock *SwitchBB) { + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; + + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Size is the number of Cases represented by this range. + unsigned Size = CR.Range.second - CR.Range.first; + + const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); + double FMetric = 0; + CaseItr Pivot = CR.Range.first + Size/2; + + // Select optimal pivot, maximizing sum density of LHS and RHS. This will + // (heuristically) allow us to emit JumpTable's later. + APInt TSize(First.getBitWidth(), 0); + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); + + APInt LSize = FrontCase.size(); + APInt RSize = TSize-LSize; + DEBUG(dbgs() << "Selecting best pivot: \n" + << "First: " << First << ", Last: " << Last <<'\n' + << "LSize: " << LSize << ", RSize: " << RSize << '\n'); + for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; + J!=E; ++I, ++J) { + const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); + const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); + APInt Range = ComputeRange(LEnd, RBegin); + assert((Range - 2ULL).isNonNegative() && + "Invalid case distance"); + double LDensity = (double)LSize.roundToDouble() / + (LEnd - First + 1ULL).roundToDouble(); + double RDensity = (double)RSize.roundToDouble() / + (Last - RBegin + 1ULL).roundToDouble(); + double Metric = Range.logBase2()*(LDensity+RDensity); + // Should always split in some non-trivial place + DEBUG(dbgs() <<"=>Step\n" + << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' + << "LDensity: " << LDensity + << ", RDensity: " << RDensity << '\n' + << "Metric: " << Metric << '\n'); + if (FMetric < Metric) { + Pivot = J; + FMetric = Metric; + DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); + } + + LSize += J->size(); + RSize -= J->size(); + } + if (areJTsAllowed(TLI)) { + // If our case is dense we *really* should handle it earlier! + assert((FMetric > 0) && "Should handle dense range earlier!"); + } else { + Pivot = CR.Range.first + Size/2; + } + + CaseRange LHSR(CR.Range.first, Pivot); + CaseRange RHSR(Pivot, CR.Range.second); + Constant *C = Pivot->Low; + MachineBasicBlock *FalseBB = 0, *TrueBB = 0; + + // We know that we branch to the LHS if the Value being switched on is + // less than the Pivot value, C. We use this to optimize our binary + // tree a bit, by recognizing that if SV is greater than or equal to the + // LHS's Case Value, and that Case Value is exactly one less than the + // Pivot's Value, then we can branch directly to the LHS's Target, + // rather than creating a leaf node for it. + if ((LHSR.second - LHSR.first) == 1 && + LHSR.first->High == CR.GE && + cast<ConstantInt>(C)->getValue() == + (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { + TrueBB = LHSR.first->BB; + } else { + TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, TrueBB); + WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + // Similar to the optimization above, if the Value being switched on is + // known to be less than the Constant CR.LT, and the current Case Value + // is CR.LT - 1, then we can branch directly to the target block for + // the current Case Value, rather than emitting a RHS leaf node for it. + if ((RHSR.second - RHSR.first) == 1 && CR.LT && + cast<ConstantInt>(RHSR.first->Low)->getValue() == + (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { + FalseBB = RHSR.first->BB; + } else { + FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, FalseBB); + WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + // Create a CaseBlock record representing a conditional branch to + // the LHS node if the value being switched on SV is less than C. + // Otherwise, branch to LHS. + CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + + if (CR.CaseBB == SwitchBB) + visitSwitchCase(CB, SwitchBB); + else + SwitchCases.push_back(CB); + + return true; +} + +/// handleBitTestsSwitchCase - if current case range has few destination and +/// range span less, than machine word bitwidth, encode case range into series +/// of masks and emit bit tests with these masks. +bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB){ + EVT PTy = TLI.getPointerTy(); + unsigned IntPtrBits = PTy.getSizeInBits(); + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; + + // If target does not have legal shift left, do not emit bit tests at all. + if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) + return false; + + size_t numCmps = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) { + // Single case counts one, case range - two. + numCmps += (I->Low == I->High ? 1 : 2); + } + + // Count unique destinations + SmallSet<MachineBasicBlock*, 4> Dests; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + Dests.insert(I->BB); + if (Dests.size() > 3) + // Don't bother the code below, if there are too much unique destinations + return false; + } + DEBUG(dbgs() << "Total number of unique destinations: " + << Dests.size() << '\n' + << "Total number of comparisons: " << numCmps << '\n'); + + // Compute span of values. + const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); + APInt cmpRange = maxValue - minValue; + + DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' + << "Low bound: " << minValue << '\n' + << "High bound: " << maxValue << '\n'); + + if (cmpRange.uge(IntPtrBits) || + (!(Dests.size() == 1 && numCmps >= 3) && + !(Dests.size() == 2 && numCmps >= 5) && + !(Dests.size() >= 3 && numCmps >= 6))) + return false; + + DEBUG(dbgs() << "Emitting bit tests\n"); + APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); + + // Optimize the case where all the case values fit in a + // word without having to subtract minValue. In this case, + // we can optimize away the subtraction. + if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { + cmpRange = maxValue; + } else { + lowBound = minValue; + } + + CaseBitsVector CasesBits; + unsigned i, count = 0; + + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + MachineBasicBlock* Dest = I->BB; + for (i = 0; i < count; ++i) + if (Dest == CasesBits[i].BB) + break; + + if (i == count) { + assert((count < 3) && "Too much destinations to test!"); + CasesBits.push_back(CaseBits(0, Dest, 0)); + count++; + } + + const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); + const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); + + uint64_t lo = (lowValue - lowBound).getZExtValue(); + uint64_t hi = (highValue - lowBound).getZExtValue(); + + for (uint64_t j = lo; j <= hi; j++) { + CasesBits[i].Mask |= 1ULL << j; + CasesBits[i].Bits++; + } + + } + std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); + + BitTestInfo BTC; + + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + DEBUG(dbgs() << "Cases:\n"); + for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { + DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask + << ", Bits: " << CasesBits[i].Bits + << ", BB: " << CasesBits[i].BB << '\n'); + + MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, CaseBB); + BTC.push_back(BitTestCase(CasesBits[i].Mask, + CaseBB, + CasesBits[i].BB)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + BitTestBlock BTB(lowBound, cmpRange, SV, + -1U, (CR.CaseBB == SwitchBB), + CR.CaseBB, Default, BTC); + + if (CR.CaseBB == SwitchBB) + visitBitTestHeader(BTB, SwitchBB); + + BitTestCases.push_back(BTB); + + return true; +} + +/// Clusterify - Transform simple list of Cases into list of CaseRange's +size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, + const SwitchInst& SI) { + size_t numCmps = 0; + + // Start with "simple" cases + for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { + MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)]; + Cases.push_back(Case(SI.getSuccessorValue(i), + SI.getSuccessorValue(i), + SMBB)); + } + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size() >= 2) + // Must recompute end() each iteration because it may be + // invalidated by erase if we hold on to it + for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) { + const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); + const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); + MachineBasicBlock* nextBB = J->BB; + MachineBasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { + I->High = J->High; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; + } + + return numCmps; +} + +void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; + + // If there is only the default destination, branch to it if it is not the + // next basic block. Otherwise, just fall through. + if (SI.getNumOperands() == 2) { + // Update machine-CFG edges. + + // If this is not a fall-through branch, emit the branch. + SwitchMBB->addSuccessor(Default); + if (Default != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Default))); + + return; + } + + // If there are any non-default case statements, create a vector of Cases + // representing each one, and sort the vector so that we can efficiently + // create a binary search tree from them. + CaseVector Cases; + size_t numCmps = Clusterify(Cases, SI); + DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << '\n'); + numCmps = 0; + + // Get the Value to be switched on and default basic blocks, which will be + // inserted into CaseBlock records, representing basic blocks in the binary + // search tree. + const Value *SV = SI.getOperand(0); + + // Push the initial CaseRec onto the worklist + CaseRecVector WorkList; + WorkList.push_back(CaseRec(SwitchMBB,0,0, + CaseRange(Cases.begin(),Cases.end()))); + + while (!WorkList.empty()) { + // Grab a record representing a case range to process off the worklist + CaseRec CR = WorkList.back(); + WorkList.pop_back(); + + if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) + continue; + + // If the range has few cases (two or less) emit a series of specific + // tests. + if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) + continue; + + // If the switch has more than 5 blocks, and at least 40% dense, and the + // target supports indirect branches, then emit a jump table rather than + // lowering the switch to a binary tree of conditional branches. + if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) + continue; + + // Emit binary tree. We need to pick a pivot, and push left and right ranges + // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. + handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); + } +} + +void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; + + // Update machine-CFG edges with unique successors. + SmallVector<BasicBlock*, 32> succs; + succs.reserve(I.getNumSuccessors()); + for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) + succs.push_back(I.getSuccessor(i)); + array_pod_sort(succs.begin(), succs.end()); + succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); + for (unsigned i = 0, e = succs.size(); i != e; ++i) + IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); + + DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), + MVT::Other, getControlRoot(), + getValue(I.getAddress()))); +} + +void SelectionDAGBuilder::visitFSub(const User &I) { + // -0.0 - X --> fneg + const Type *Ty = I.getType(); + if (Ty->isVectorTy()) { + if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) { + const VectorType *DestTy = cast<VectorType>(I.getType()); + const Type *ElTy = DestTy->getElementType(); + unsigned VL = DestTy->getNumElements(); + std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy)); + Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); + if (CV == CNZ) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } + } + } + + if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) + if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } + + visitBinary(I, ISD::FSUB); +} + +void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + if (!I.getType()->isVectorTy() && + Op2.getValueType() != TLI.getShiftAmountTy()) { + // If the operand is smaller than the shift count type, promote it. + EVT PTy = TLI.getPointerTy(); + EVT STy = TLI.getShiftAmountTy(); + if (STy.bitsGT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // If the operand is larger than the shift count type but the shift + // count type has enough bits to represent any shift value, truncate + // it now. This is a common case and it exposes the truncate to + // optimization early. + else if (STy.getSizeInBits() >= + Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // Otherwise we'll need to temporarily settle for some other + // convenient type; type legalization will make adjustments as + // needed. + else if (PTy.bitsLT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), Op2); + else if (PTy.bitsGT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), Op2); + } + + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGBuilder::visitICmp(const User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) + predicate = IC->getPredicate(); + else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode = getICmpCondCode(predicate); + + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); +} + +void SelectionDAGBuilder::visitFCmp(const User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) + predicate = FC->getPredicate(); + else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Condition = getFCmpCondCode(predicate); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); +} + +void SelectionDAGBuilder::visitSelect(const User &I) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + + SmallVector<SDValue, 4> Values(NumValues); + SDValue Cond = getValue(I.getOperand(0)); + SDValue TrueVal = getValue(I.getOperand(1)); + SDValue FalseVal = getValue(I.getOperand(2)); + + for (unsigned i = 0; i != NumValues; ++i) + Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), + TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), + Cond, + SDValue(TrueVal.getNode(), + TrueVal.getResNo() + i), + SDValue(FalseVal.getNode(), + FalseVal.getResNo() + i)); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); +} + +void SelectionDAGBuilder::visitTrunc(const User &I) { + // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitZExt(const User &I) { + // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // ZExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitSExt(const User &I) { + // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // SExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPTrunc(const User &I) { + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), + DestVT, N, DAG.getIntPtrConstant(0))); +} + +void SelectionDAGBuilder::visitFPExt(const User &I){ + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPToUI(const User &I) { + // FPToUI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPToSI(const User &I) { + // FPToSI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitUIToFP(const User &I) { + // UIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitSIToFP(const User &I){ + // SIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitPtrToInt(const User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); +} + +void SelectionDAGBuilder::visitIntToPtr(const User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); +} + +void SelectionDAGBuilder::visitBitCast(const User &I) { + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + + // BitCast assures us that source and destination are the same size so this is + // either a BIT_CONVERT or a no-op. + if (DestVT != N.getValueType()) + setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + DestVT, N)); // convert types. + else + setValue(&I, N); // noop cast. +} + +void SelectionDAGBuilder::visitInsertElement(const User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InVal = getValue(I.getOperand(1)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(2))); + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), + InVec, InVal, InIdx)); +} + +void SelectionDAGBuilder::visitExtractElement(const User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(1))); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), InVec, InIdx)); +} + +// Utility for visitShuffleVector - Returns true if the mask is mask starting +// from SIndx and increasing to the element length (undefs are allowed). +static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) { + unsigned MaskNumElts = Mask.size(); + for (unsigned i = 0; i != MaskNumElts; ++i) + if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx))) + return false; + return true; +} + +void SelectionDAGBuilder::visitShuffleVector(const User &I) { + SmallVector<int, 8> Mask; + SDValue Src1 = getValue(I.getOperand(0)); + SDValue Src2 = getValue(I.getOperand(1)); + + // Convert the ConstantVector mask operand into an array of ints, with -1 + // representing undef values. + SmallVector<Constant*, 8> MaskElts; + cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts); + unsigned MaskNumElts = MaskElts.size(); + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (isa<UndefValue>(MaskElts[i])) + Mask.push_back(-1); + else + Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue()); + } + + EVT VT = TLI.getValueType(I.getType()); + EVT SrcVT = Src1.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); + + if (SrcNumElts == MaskNumElts) { + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &Mask[0])); + return; + } + + // Normalize the shuffle vector since mask and vector length don't match. + if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { + // Mask is longer than the source vectors and is a multiple of the source + // vectors. We can use concatenate vector to make the mask and vectors + // lengths match. + if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src1, Src2)); + return; + } + + // Pad both vectors with undefs to make them the same length as the mask. + unsigned NumConcat = MaskNumElts / SrcNumElts; + bool Src1U = Src1.getOpcode() == ISD::UNDEF; + bool Src2U = Src2.getOpcode() == ISD::UNDEF; + SDValue UndefVal = DAG.getUNDEF(SrcVT); + + SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); + SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); + MOps1[0] = Src1; + MOps2[0] = Src2; + + Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps1[0], NumConcat); + Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps2[0], NumConcat); + + // Readjust mask for new input vector length. + SmallVector<int, 8> MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < (int)SrcNumElts) + MappedOps.push_back(Idx); + else + MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + } + + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + + if (SrcNumElts > MaskNumElts) { + // Analyze the access pattern of the vector to see if we can extract + // two subvectors and do the shuffle. The analysis is done by calculating + // the range of elements the mask access on both vectors. + int MinRange[2] = { SrcNumElts+1, SrcNumElts+1}; + int MaxRange[2] = {-1, -1}; + + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + int Input = 0; + if (Idx < 0) + continue; + + if (Idx >= (int)SrcNumElts) { + Input = 1; + Idx -= SrcNumElts; + } + if (Idx > MaxRange[Input]) + MaxRange[Input] = Idx; + if (Idx < MinRange[Input]) + MinRange[Input] = Idx; + } + + // Check if the access is smaller than the vector size and can we find + // a reasonable extract index. + int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not + // Extract. + int StartIdx[2]; // StartIdx to extract from + for (int Input=0; Input < 2; ++Input) { + if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { + RangeUse[Input] = 0; // Unused + StartIdx[Input] = 0; + } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { + // Fits within range but we should see if we can find a good + // start index that is a multiple of the mask length. + if (MaxRange[Input] < (int)MaskNumElts) { + RangeUse[Input] = 1; // Extract from beginning of the vector + StartIdx[Input] = 0; + } else { + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts < SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. + } + } + } + + if (RangeUse[0] == 0 && RangeUse[1] == 0) { + setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. + return; + } + else if (RangeUse[0] < 2 && RangeUse[1] < 2) { + // Extract appropriate subvector and generate a vector shuffle + for (int Input=0; Input < 2; ++Input) { + SDValue &Src = Input == 0 ? Src1 : Src2; + if (RangeUse[Input] == 0) + Src = DAG.getUNDEF(VT); + else + Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, + Src, DAG.getIntPtrConstant(StartIdx[Input])); + } + + // Calculate new mask. + SmallVector<int, 8> MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + MappedOps.push_back(Idx); + else if (Idx < (int)SrcNumElts) + MappedOps.push_back(Idx - StartIdx[0]); + else + MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + } + + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + } + + // We can't use either concat vectors or extract subvectors so fall back to + // replacing the shuffle with extract and build vector. + // to insert and build vector. + EVT EltVT = VT.getVectorElementType(); + EVT PtrVT = TLI.getPointerTy(); + SmallVector<SDValue,8> Ops; + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + } else { + int Idx = Mask[i]; + SDValue Res; + + if (Idx < (int)SrcNumElts) + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src1, DAG.getConstant(Idx, PtrVT)); + else + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src2, + DAG.getConstant(Idx - SrcNumElts, PtrVT)); + + Ops.push_back(Res); + } + } + + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size())); +} + +void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Value *Op1 = I.getOperand(1); + const Type *AggTy = I.getType(); + const Type *ValTy = Op1->getType(); + bool IntoUndef = isa<UndefValue>(Op0); + bool FromUndef = isa<UndefValue>(Op1); + + unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, + I.idx_begin(), I.idx_end()); + + SmallVector<EVT, 4> AggValueVTs; + ComputeValueVTs(TLI, AggTy, AggValueVTs); + SmallVector<EVT, 4> ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumAggValues = AggValueVTs.size(); + unsigned NumValValues = ValValueVTs.size(); + SmallVector<SDValue, 4> Values(NumAggValues); + + SDValue Agg = getValue(Op0); + SDValue Val = getValue(Op1); + unsigned i = 0; + // Copy the beginning value(s) from the original aggregate. + for (; i != LinearIndex; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + // Copy values from the inserted value(s). + for (; i != LinearIndex + NumValValues; ++i) + Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + // Copy remaining value(s) from the original aggregate. + for (; i != NumAggValues; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&AggValueVTs[0], NumAggValues), + &Values[0], NumAggValues)); +} + +void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Type *AggTy = Op0->getType(); + const Type *ValTy = I.getType(); + bool OutOfUndef = isa<UndefValue>(Op0); + + unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, + I.idx_begin(), I.idx_end()); + + SmallVector<EVT, 4> ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumValValues = ValValueVTs.size(); + SmallVector<SDValue, 4> Values(NumValValues); + + SDValue Agg = getValue(Op0); + // Copy out the selected value(s). + for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) + Values[i - LinearIndex] = + OutOfUndef ? + DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValValueVTs[0], NumValValues), + &Values[0], NumValValues)); +} + +void SelectionDAGBuilder::visitGetElementPtr(const User &I) { + SDValue N = getValue(I.getOperand(0)); + const Type *Ty = I.getOperand(0)->getType(); + + for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); + OI != E; ++OI) { + const Value *Idx = *OI; + if (const StructType *StTy = dyn_cast<StructType>(Ty)) { + unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + DAG.getIntPtrConstant(Offset)); + } + + Ty = StTy->getElementType(Field); + } else { + Ty = cast<SequentialType>(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->isZero()) continue; + uint64_t Offs = + TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + SDValue OffsVal; + EVT PTy = TLI.getPointerTy(); + unsigned PtrBits = PTy.getSizeInBits(); + if (PtrBits < 64) + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(Offs, MVT::i64)); + else + OffsVal = DAG.getIntPtrConstant(Offs); + + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + OffsVal); + continue; + } + + // N = N + Idx * ElementSize; + APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(), + TD->getTypeAllocSize(Ty)); + SDValue IdxN = getValue(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend + // it. + IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType()); + + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (ElementSize != 1) { + if (ElementSize.isPowerOf2()) { + unsigned Amt = ElementSize.logBase2(); + IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), + N.getValueType(), IdxN, + DAG.getConstant(Amt, TLI.getPointerTy())); + } else { + SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy()); + IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), + N.getValueType(), IdxN, Scale); + } + } + + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), + N.getValueType(), N, IdxN); + } + } + + setValue(&I, N); +} + +void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { + // If this is a fixed sized alloca in the entry block of the function, + // allocate it statically on the stack. + if (FuncInfo.StaticAllocaMap.count(&I)) + return; // getValue will auto-populate this. + + const Type *Ty = I.getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + I.getAlignment()); + + SDValue AllocSize = getValue(I.getArraySize()); + + EVT IntPtr = TLI.getPointerTy(); + if (AllocSize.getValueType() != IntPtr) + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + + AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, + AllocSize, + DAG.getConstant(TySize, IntPtr)); + + // Handle alignment. If the requested alignment is less than or equal to + // the stack alignment, ignore it. If the size is greater than or equal to + // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. + unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); + if (Align <= StackAlign) + Align = 0; + + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. + AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(StackAlign-1)); + + // Mask out the low bits for alignment purposes. + AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); + + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; + SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), + VTs, Ops, 3); + setValue(&I, DSA); + DAG.setRoot(DSA.getValue(1)); + + // Inform the Frame Information that we have just allocated a variable-sized + // object. + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); +} + +void SelectionDAGBuilder::visitLoad(const LoadInst &I) { + const Value *SV = I.getOperand(0); + SDValue Ptr = getValue(SV); + + const Type *Ty = I.getType(); + + bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata("nontemporal") != 0; + unsigned Alignment = I.getAlignment(); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + SDValue Root; + bool ConstantMemory = false; + if (I.isVolatile()) + // Serialize volatile loads with other side effects. + Root = getRoot(); + else if (AA->pointsToConstantMemory(SV)) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = DAG.getRoot(); + } + + SmallVector<SDValue, 4> Values(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + EVT PtrVT = Ptr.getValueType(); + for (unsigned i = 0; i != NumValues; ++i) { + SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(), + PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)); + SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, + A, SV, Offsets[i], isVolatile, + isNonTemporal, Alignment); + + Values[i] = L; + Chains[i] = L.getValue(1); + } + + if (!ConstantMemory) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + if (isVolatile) + DAG.setRoot(Chain); + else + PendingLoads.push_back(Chain); + } + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); +} + +void SelectionDAGBuilder::visitStore(const StoreInst &I) { + const Value *SrcV = I.getOperand(0); + const Value *PtrV = I.getOperand(1); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + // Get the lowered operands. Note that we do this after + // checking if NumResults is zero, because with zero results + // the operands won't have values in the map. + SDValue Src = getValue(SrcV); + SDValue Ptr = getValue(PtrV); + + SDValue Root = getRoot(); + SmallVector<SDValue, 4> Chains(NumValues); + EVT PtrVT = Ptr.getValueType(); + bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata("nontemporal") != 0; + unsigned Alignment = I.getAlignment(); + + for (unsigned i = 0; i != NumValues; ++i) { + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)); + Chains[i] = DAG.getStore(Root, getCurDebugLoc(), + SDValue(Src.getNode(), Src.getResNo() + i), + Add, PtrV, Offsets[i], isVolatile, + isNonTemporal, Alignment); + } + + DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues)); +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, + unsigned Intrinsic) { + bool HasChain = !I.doesNotAccessMemory(); + bool OnlyLoad = HasChain && I.onlyReadsMemory(); + + // Build the operand list. + SmallVector<SDValue, 8> Ops; + if (HasChain) { // If this intrinsic has side-effects, chainify it. + if (OnlyLoad) { + // We don't need to serialize loads against other loads. + Ops.push_back(DAG.getRoot()); + } else { + Ops.push_back(getRoot()); + } + } + + // Info is set by getTgtMemInstrinsic + TargetLowering::IntrinsicInfo Info; + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + + // Add the intrinsic ID as an integer operand if it's not a target intrinsic. + if (!IsTgtIntrinsic) + Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); + + // Add all operands of the call to the operand list. + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + SDValue Op = getValue(I.getArgOperand(i)); + assert(TLI.isTypeLegal(Op.getValueType()) && + "Intrinsic uses a non-legal type?"); + Ops.push_back(Op); + } + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getType(), ValueVTs); +#ifndef NDEBUG + for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) { + assert(TLI.isTypeLegal(ValueVTs[Val]) && + "Intrinsic uses a non-legal type?"); + } +#endif // NDEBUG + + if (HasChain) + ValueVTs.push_back(MVT::Other); + + SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + + // Create the node. + SDValue Result; + if (IsTgtIntrinsic) { + // This is target intrinsic that touches memory + Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), + VTs, &Ops[0], Ops.size(), + Info.memVT, Info.ptrVal, Info.offset, + Info.align, Info.vol, + Info.readMem, Info.writeMem); + } else if (!HasChain) { + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + } else if (!I.getType()->isVoidTy()) { + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + } else { + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + } + + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + + if (!I.getType()->isVoidTy()) { + if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) { + EVT VT = TLI.getValueType(PTy); + Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); + } + + setValue(&I, Result); + } +} + +/// GetSignificand - Get the significand and build it into a floating-point +/// number with exponent of 1: +/// +/// Op = (Op & 0x007fffff) | 0x3f800000; +/// +/// where Op is the hexidecimal representation of floating point value. +static SDValue +GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { + SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x007fffff, MVT::i32)); + SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, + DAG.getConstant(0x3f800000, MVT::i32)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); +} + +/// GetExponent - Get the exponent: +/// +/// (float)(int)(((Op & 0x7f800000) >> 23) - 127); +/// +/// where Op is the hexidecimal representation of floating point value. +static SDValue +GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, + DebugLoc dl) { + SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x7f800000, MVT::i32)); + SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, + DAG.getConstant(23, TLI.getPointerTy())); + SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, + DAG.getConstant(127, MVT::i32)); + return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); +} + +/// getF32Constant - Get 32-bit floating point constant. +static SDValue +getF32Constant(SelectionDAG &DAG, unsigned Flt) { + return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); +} + +/// Inlined utility function to implement binary input atomic intrinsics for +/// visitIntrinsicCall: I is a call instruction +/// Op is the associated NodeType for I +const char * +SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, + ISD::NodeType Op) { + SDValue Root = getRoot(); + SDValue L = + DAG.getAtomic(Op, getCurDebugLoc(), + getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), + Root, + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + I.getArgOperand(0)); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; +} + +// implVisitAluOverflow - Lower arithmetic overflow instrinsics. +const char * +SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); + return 0; +} + +/// visitExp - Lower an exp intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGBuilder::visitExp(const CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getArgOperand(0)); + + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OFe 1.4426950f + // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + getF32Constant(DAG, 0x3fb8aa3b)); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5); + + // Add the exponent into the result in integer domain. + SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // 0.000107046256 error, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7); + + // Add the exponent into the result in integer domain. + SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::i32, t13); + + // Add the exponent into the result in integer domain. + SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FEXP, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); + } + + setValue(&I, result); +} + +/// visitLog - Lower a log intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGBuilder::visitLog(const CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getArgOperand(0)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Scale the exponent by log(2) [0.69314718f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3f317218)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // LogofMantissa = + // -1.1609546f + + // (1.4034025f - 0.23903021f * x) * x; + // + // error 0.0034276066, which is better than 8 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbe74c456)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3fb3a2b1)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f949a29)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // LogOfMantissa = + // -1.7417939f + + // (2.8212026f + + // (-1.4699568f + + // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; + // + // error 0.000061011436, which is 14 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbd67b6d6)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ee4f4b8)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fbc278b)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40348e95)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fdef31a)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // LogOfMantissa = + // -2.1072184f + + // (4.2372794f + + // (-3.7029485f + + // (2.2781945f + + // (-0.87823314f + + // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; + // + // error 0.0000023660568, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbc91e5ac)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e4350aa)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f60d3e3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x4011cdf0)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x406cfd1c)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x408797cb)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4006dcab)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); + } + + setValue(&I, result); +} + +/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGBuilder::visitLog2(const CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getArgOperand(0)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Get the exponent. + SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + // Different possible minimax approximations of significand in + // floating-point for various degrees of accuracy over [1,2]. + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; + // + // error 0.0049451742, which is more than 7 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbeb08fe0)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x40019463)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fd6633d)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log2ofMantissa = + // -2.51285454f + + // (4.07009056f + + // (-2.12067489f + + // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; + // + // error 0.0000876136000, which is better than 13 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbda7262e)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f25280b)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x4007b923)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40823e2f)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x4020d29c)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log2ofMantissa = + // -3.0400495f + + // (6.1129976f + + // (-5.3420409f + + // (3.2865683f + + // (-1.2669343f + + // (0.27515199f - + // 0.25691327e-1f * x) * x) * x) * x) * x) * x; + // + // error 0.0000018516, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbcd2769e)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e8ce0b9)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fa22ae7)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40525723)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x40aaf200)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x40c39dad)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4042902c)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG2, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); + } + + setValue(&I, result); +} + +/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGBuilder::visitLog10(const CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getArgOperand(0)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Scale the exponent by log10(2) [0.30102999f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3e9a209a)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log10ofMantissa = + // -0.50419619f + + // (0.60948995f - 0.10380950f * x) * x; + // + // error 0.0014886165, which is 6 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbdd49a13)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f1c0789)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f011300)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log10ofMantissa = + // -0.64831180f + + // (0.91751397f + + // (-0.31664806f + 0.47637168e-1f * x) * x) * x; + // + // error 0.00019228036, which is better than 12 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3d431f31)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ea21fb2)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f6ae232)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f25f7c3)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log10ofMantissa = + // -0.84299375f + + // (1.5327582f + + // (-1.0688956f + + // (0.49102474f + + // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; + // + // error 0.0000037995730, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3c5d51ce)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e00685a)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3efb6798)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f88d192)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fc4316c)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3f57ce70)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG10, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); + } + + setValue(&I, result); +} + +/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGBuilder::visitExp2(const CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getArgOperand(0)); + + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); + + // FractionalPartOfX = x - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FEXP2, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); + } + + setValue(&I, result); +} + +/// visitPow - Lower a pow intrinsic. Handles the special sequences for +/// limited-precision mode with x == 10.0f. +void +SelectionDAGBuilder::visitPow(const CallInst &I) { + SDValue result; + const Value *Val = I.getArgOperand(0); + DebugLoc dl = getCurDebugLoc(); + bool IsExp10 = false; + + if (getValue(Val).getValueType() == MVT::f32 && + getValue(I.getArgOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) { + if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { + APFloat Ten(10.0f); + IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten); + } + } + } + + if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getArgOperand(1)); + + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OF10 3.3219281f + // IntegerPartOfX = (int32_t)(x * LOG2OF10); + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + getF32Constant(DAG, 0x40549a78)); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = x - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // twoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FPOW, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1))); + } + + setValue(&I, result); +} + + +/// ExpandPowI - Expand a llvm.powi intrinsic. +static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, + SelectionDAG &DAG) { + // If RHS is a constant, we can expand this out to a multiplication tree, + // otherwise we end up lowering to a call to __powidf2 (for example). When + // optimizing for size, we only want to do this if the expansion would produce + // a small number of multiplies, otherwise we do the full expansion. + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { + // Get the exponent as a positive value. + unsigned Val = RHSC->getSExtValue(); + if ((int)Val < 0) Val = -Val; + + // powi(x, 0) -> 1.0 + if (Val == 0) + return DAG.getConstantFP(1.0, LHS.getValueType()); + + const Function *F = DAG.getMachineFunction().getFunction(); + if (!F->hasFnAttr(Attribute::OptimizeForSize) || + // If optimizing for size, don't insert too many multiplies. This + // inserts up to 5 multiplies. + CountPopulation_32(Val)+Log2_32(Val) < 7) { + // We use the simple binary decomposition method to generate the multiply + // sequence. There are more optimal ways to do this (for example, + // powi(x,15) generates one more multiply than it should), but this has + // the benefit of being both really simple and much better than a libcall. + SDValue Res; // Logically starts equal to 1.0 + SDValue CurSquare = LHS; + while (Val) { + if (Val & 1) { + if (Res.getNode()) + Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); + else + Res = CurSquare; // 1.0*CurSquare. + } + + CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), + CurSquare, CurSquare); + Val >>= 1; + } + + // If the original was negative, invert the result, producing 1/(x*x*x). + if (RHSC->getSExtValue() < 0) + Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), + DAG.getConstantFP(1.0, LHS.getValueType()), Res); + return Res; + } + } + + // Otherwise, expand to a libcall. + return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); +} + +/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function +/// argument, create the corresponding DBG_VALUE machine instruction for it now. +/// At the end of instruction selection, they will be inserted to the entry BB. +bool +SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + int64_t Offset, + const SDValue &N) { + const Argument *Arg = dyn_cast<Argument>(V); + if (!Arg) + return false; + + MachineFunction &MF = DAG.getMachineFunction(); + // Ignore inlined function arguments here. + DIVariable DV(Variable); + if (DV.isInlinedFnArgument(MF.getFunction())) + return false; + + MachineBasicBlock *MBB = FuncInfo.MBB; + if (MBB != &MF.front()) + return false; + + unsigned Reg = 0; + if (Arg->hasByValAttr()) { + // Byval arguments' frame index is recorded during argument lowering. + // Use this info directly. + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + Reg = TRI->getFrameRegister(MF); + Offset = FuncInfo.getByValArgumentFrameIndex(Arg); + } + + if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) { + Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); + if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned PR = RegInfo.getLiveInPhysReg(Reg); + if (PR) + Reg = PR; + } + } + + if (!Reg) { + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI == FuncInfo.ValueMap.end()) + return false; + Reg = VMI->second; + } + + const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), + TII->get(TargetOpcode::DBG_VALUE)) + .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); + FuncInfo.ArgDbgValues.push_back(&*MIB); + return true; +} + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) +#define setjmp_undefined_for_visual_studio +#undef setjmp +#endif + +/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If +/// we want to emit this as a call to a named external function, return the name +/// otherwise lower it and return null. +const char * +SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { + DebugLoc dl = getCurDebugLoc(); + SDValue Res; + + switch (Intrinsic) { + default: + // By default, turn this into a target intrinsic node. + visitTargetIntrinsic(I, Intrinsic); + return 0; + case Intrinsic::vastart: visitVAStart(I); return 0; + case Intrinsic::vaend: visitVAEnd(I); return 0; + case Intrinsic::vacopy: visitVACopy(I); return 0; + case Intrinsic::returnaddress: + setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::frameaddress: + setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::setjmp: + return "_setjmp"+!TLI.usesUnderscoreSetJmp(); + case Intrinsic::longjmp: + return "_longjmp"+!TLI.usesUnderscoreLongJmp(); + case Intrinsic::memcpy: { + // Assert for address < 256 since we support only user defined address + // spaces. + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() + < 256 && + cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() + < 256 && + "Unknown address space"); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, + I.getArgOperand(0), 0, I.getArgOperand(1), 0)); + return 0; + } + case Intrinsic::memset: { + // Assert for address < 256 since we support only user defined address + // spaces. + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() + < 256 && + "Unknown address space"); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + I.getArgOperand(0), 0)); + return 0; + } + case Intrinsic::memmove: { + // Assert for address < 256 since we support only user defined address + // spaces. + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() + < 256 && + cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() + < 256 && + "Unknown address space"); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + + // If the source and destination are known to not be aliases, we can + // lower memmove as memcpy. + uint64_t Size = -1ULL; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3)) + Size = C->getZExtValue(); + if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) == + AliasAnalysis::NoAlias) { + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + false, I.getArgOperand(0), 0, + I.getArgOperand(1), 0)); + return 0; + } + + DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + I.getArgOperand(0), 0, I.getArgOperand(1), 0)); + return 0; + } + case Intrinsic::dbg_declare: { + const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + MDNode *Variable = DI.getVariable(); + const Value *Address = DI.getAddress(); + if (!Address || !DIVariable(DI.getVariable()).Verify()) + return 0; + + // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder + // but do not always have a corresponding SDNode built. The SDNodeOrder + // absolute, but not relative, values are different depending on whether + // debug info exists. + ++SDNodeOrder; + + // Check if address has undef value. + if (isa<UndefValue>(Address) || + (Address->use_empty() && !isa<Argument>(Address))) { + SDDbgValue*SDV = + DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + return 0; + } + + SDValue &N = NodeMap[Address]; + if (!N.getNode() && isa<Argument>(Address)) + // Check unused arguments map. + N = UnusedArgNodeMap[Address]; + SDDbgValue *SDV; + if (N.getNode()) { + // Parameters are handled specially. + bool isParameter = + DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); + + if (isParameter && !AI) { + FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); + if (FINode) + // Byval parameter. We have a frame index at this point. + SDV = DAG.getDbgValue(Variable, FINode->getIndex(), + 0, dl, SDNodeOrder); + else + // Can't do anything with other non-AI cases yet. This might be a + // parameter of a callee function that got inlined, for example. + return 0; + } else if (AI) + SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), + 0, dl, SDNodeOrder); + else + // Can't do anything with other non-AI cases yet. + return 0; + DAG.AddDbgValue(SDV, N.getNode(), isParameter); + } else { + // If Address is an arugment then try to emits its dbg value using + // virtual register info from the FuncInfo.ValueMap. Otherwise add undef + // to help track missing debug info. + if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { + SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } + } + return 0; + } + case Intrinsic::dbg_value: { + const DbgValueInst &DI = cast<DbgValueInst>(I); + if (!DIVariable(DI.getVariable()).Verify()) + return 0; + + MDNode *Variable = DI.getVariable(); + uint64_t Offset = DI.getOffset(); + const Value *V = DI.getValue(); + if (!V) + return 0; + + // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder + // but do not always have a corresponding SDNode built. The SDNodeOrder + // absolute, but not relative, values are different depending on whether + // debug info exists. + ++SDNodeOrder; + SDDbgValue *SDV; + if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) { + SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } else { + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) + // Check unused arguments map. + N = UnusedArgNodeMap[V]; + if (N.getNode()) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) { + SDV = DAG.getDbgValue(Variable, N.getNode(), + N.getResNo(), Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + } + } else if (isa<PHINode>(V) && !V->use_empty() ) { + // Do not call getValue(V) yet, as we don't want to generate code. + // Remember it for later. + DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); + DanglingDebugInfoMap[V] = DDI; + } else { + // We may expand this to cover more cases. One case where we have no + // data available is an unreferenced parameter; we need this fallback. + SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), + Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } + } + + // Build a debug info table entry. + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + V = BCI->getOperand(0); + const AllocaInst *AI = dyn_cast<AllocaInst>(V); + // Don't handle byval struct arguments or VLAs, for example. + if (!AI) + return 0; + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) + return 0; // VLAs. + int FI = SI->second; + + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) + MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); + return 0; + } + case Intrinsic::eh_exception: { + // Insert the EXCEPTIONADDR instruction. + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDValue Ops[1]; + Ops[0] = DAG.getRoot(); + SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1); + setValue(&I, Op); + DAG.setRoot(Op.getValue(1)); + return 0; + } + + case Intrinsic::eh_selector: { + MachineBasicBlock *CallMBB = FuncInfo.MBB; + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + if (CallMBB->isLandingPad()) + AddCatchInfo(I, &MMI, CallMBB); + else { +#ifndef NDEBUG + FuncInfo.CatchInfoLost.insert(&I); +#endif + // FIXME: Mark exception selector register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionSelectorRegister(); + if (Reg) FuncInfo.MBB->addLiveIn(Reg); + } + + // Insert the EHSELECTION instruction. + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDValue Ops[2]; + Ops[0] = getValue(I.getArgOperand(0)); + Ops[1] = getRoot(); + SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); + DAG.setRoot(Op.getValue(1)); + setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32)); + return 0; + } + + case Intrinsic::eh_typeid_for: { + // Find the type id for the given typeinfo. + GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); + unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); + Res = DAG.getConstant(TypeID, MVT::i32); + setValue(&I, Res); + return 0; + } + + case Intrinsic::eh_return_i32: + case Intrinsic::eh_return_i64: + DAG.getMachineFunction().getMMI().setCallsEHReturn(true); + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, + MVT::Other, + getControlRoot(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return 0; + case Intrinsic::eh_unwind_init: + DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); + return 0; + case Intrinsic::eh_dwarf_cfa: { + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl, + TLI.getPointerTy()); + SDValue Offset = DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, + TLI.getPointerTy()), + CfaArg); + SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl, + TLI.getPointerTy(), + DAG.getConstant(0, TLI.getPointerTy())); + setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), + FA, Offset)); + return 0; + } + case Intrinsic::eh_sjlj_callsite: { + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); + assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); + assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); + + MMI.setCurrentCallSite(CI->getZExtValue()); + return 0; + } + case Intrinsic::eh_sjlj_setjmp: { + setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(), + getValue(I.getArgOperand(0)))); + return 0; + } + case Intrinsic::eh_sjlj_longjmp: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, + getRoot(), + getValue(I.getArgOperand(0)))); + return 0; + } + + case Intrinsic::convertff: + case Intrinsic::convertfsi: + case Intrinsic::convertfui: + case Intrinsic::convertsif: + case Intrinsic::convertuif: + case Intrinsic::convertss: + case Intrinsic::convertsu: + case Intrinsic::convertus: + case Intrinsic::convertuu: { + ISD::CvtCode Code = ISD::CVT_INVALID; + switch (Intrinsic) { + case Intrinsic::convertff: Code = ISD::CVT_FF; break; + case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; + case Intrinsic::convertfui: Code = ISD::CVT_FU; break; + case Intrinsic::convertsif: Code = ISD::CVT_SF; break; + case Intrinsic::convertuif: Code = ISD::CVT_UF; break; + case Intrinsic::convertss: Code = ISD::CVT_SS; break; + case Intrinsic::convertsu: Code = ISD::CVT_SU; break; + case Intrinsic::convertus: Code = ISD::CVT_US; break; + case Intrinsic::convertuu: Code = ISD::CVT_UU; break; + } + EVT DestVT = TLI.getValueType(I.getType()); + const Value *Op1 = I.getArgOperand(0); + Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), + DAG.getValueType(DestVT), + DAG.getValueType(getValue(Op1).getValueType()), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), + Code); + setValue(&I, Res); + return 0; + } + case Intrinsic::sqrt: + setValue(&I, DAG.getNode(ISD::FSQRT, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::powi: + setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG)); + return 0; + case Intrinsic::sin: + setValue(&I, DAG.getNode(ISD::FSIN, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::cos: + setValue(&I, DAG.getNode(ISD::FCOS, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::log: + visitLog(I); + return 0; + case Intrinsic::log2: + visitLog2(I); + return 0; + case Intrinsic::log10: + visitLog10(I); + return 0; + case Intrinsic::exp: + visitExp(I); + return 0; + case Intrinsic::exp2: + visitExp2(I); + return 0; + case Intrinsic::pow: + visitPow(I); + return 0; + case Intrinsic::convert_to_fp16: + setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, + MVT::i16, getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::convert_from_fp16: + setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl, + MVT::f32, getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::pcmarker: { + SDValue Tmp = getValue(I.getArgOperand(0)); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); + return 0; + } + case Intrinsic::readcyclecounter: { + SDValue Op = getRoot(); + Res = DAG.getNode(ISD::READCYCLECOUNTER, dl, + DAG.getVTList(MVT::i64, MVT::Other), + &Op, 1); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return 0; + } + case Intrinsic::bswap: + setValue(&I, DAG.getNode(ISD::BSWAP, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::cttz: { + SDValue Arg = getValue(I.getArgOperand(0)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg)); + return 0; + } + case Intrinsic::ctlz: { + SDValue Arg = getValue(I.getArgOperand(0)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg)); + return 0; + } + case Intrinsic::ctpop: { + SDValue Arg = getValue(I.getArgOperand(0)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg)); + return 0; + } + case Intrinsic::stacksave: { + SDValue Op = getRoot(); + Res = DAG.getNode(ISD::STACKSAVE, dl, + DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return 0; + } + case Intrinsic::stackrestore: { + Res = getValue(I.getArgOperand(0)); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res)); + return 0; + } + case Intrinsic::stackprotector: { + // Emit code into the DAG to store the stack guard onto the stack. + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + EVT PtrTy = TLI.getPointerTy(); + + SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. + AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); + + int FI = FuncInfo.StaticAllocaMap[Slot]; + MFI->setStackProtectorIndex(FI); + + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + + // Store the stack protector onto the stack. + Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, + PseudoSourceValue::getFixedStack(FI), + 0, true, false, 0); + setValue(&I, Res); + DAG.setRoot(Res); + return 0; + } + case Intrinsic::objectsize: { + // If we don't know by now, we're never going to know. + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); + + assert(CI && "Non-constant type in __builtin_object_size?"); + + SDValue Arg = getValue(I.getCalledValue()); + EVT Ty = Arg.getValueType(); + + if (CI->isZero()) + Res = DAG.getConstant(-1ULL, Ty); + else + Res = DAG.getConstant(0, Ty); + + setValue(&I, Res); + return 0; + } + case Intrinsic::var_annotation: + // Discard annotate attributes + return 0; + + case Intrinsic::init_trampoline: { + const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); + + SDValue Ops[6]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); + Ops[5] = DAG.getSrcValue(F); + + Res = DAG.getNode(ISD::TRAMPOLINE, dl, + DAG.getVTList(TLI.getPointerTy(), MVT::Other), + Ops, 6); + + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return 0; + } + case Intrinsic::gcroot: + if (GFI) { + const Value *Alloca = I.getArgOperand(0); + const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); + + FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); + GFI->addStackRoot(FI->getIndex(), TypeMap); + } + return 0; + case Intrinsic::gcread: + case Intrinsic::gcwrite: + llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); + return 0; + case Intrinsic::flt_rounds: + setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); + return 0; + case Intrinsic::trap: + DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + return 0; + case Intrinsic::uadd_with_overflow: + return implVisitAluOverflow(I, ISD::UADDO); + case Intrinsic::sadd_with_overflow: + return implVisitAluOverflow(I, ISD::SADDO); + case Intrinsic::usub_with_overflow: + return implVisitAluOverflow(I, ISD::USUBO); + case Intrinsic::ssub_with_overflow: + return implVisitAluOverflow(I, ISD::SSUBO); + case Intrinsic::umul_with_overflow: + return implVisitAluOverflow(I, ISD::UMULO); + case Intrinsic::smul_with_overflow: + return implVisitAluOverflow(I, ISD::SMULO); + + case Intrinsic::prefetch: { + SDValue Ops[4]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); + return 0; + } + + case Intrinsic::memory_barrier: { + SDValue Ops[6]; + Ops[0] = getRoot(); + for (int x = 1; x < 6; ++x) + Ops[x] = getValue(I.getArgOperand(x - 1)); + + DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6)); + return 0; + } + case Intrinsic::atomic_cmp_swap: { + SDValue Root = getRoot(); + SDValue L = + DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(), + getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), + Root, + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), + I.getArgOperand(0)); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; + } + case Intrinsic::atomic_load_add: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD); + case Intrinsic::atomic_load_sub: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB); + case Intrinsic::atomic_load_or: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR); + case Intrinsic::atomic_load_xor: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR); + case Intrinsic::atomic_load_and: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND); + case Intrinsic::atomic_load_nand: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND); + case Intrinsic::atomic_load_max: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX); + case Intrinsic::atomic_load_min: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN); + case Intrinsic::atomic_load_umin: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN); + case Intrinsic::atomic_load_umax: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX); + case Intrinsic::atomic_swap: + return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); + + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + // Discard region information. + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + return 0; + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + // Discard region information. + return 0; + } +} + +void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, + bool isTailCall, + MachineBasicBlock *LandingPad) { + const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + const Type *RetTy = FTy->getReturnType(); + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + MCSymbol *BeginLabel = 0; + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Args.reserve(CS.arg_size()); + + // Check whether the function can return without sret-demotion. + SmallVector<ISD::OutputArg, 4> Outs; + SmallVector<uint64_t, 4> Offsets; + GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), + Outs, TLI, &Offsets); + + bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), + FTy->isVarArg(), Outs, FTy->getContext()); + + SDValue DemoteStackSlot; + + if (!CanLowerReturn) { + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( + FTy->getReturnType()); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( + FTy->getReturnType()); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); + + DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isInReg = false; + Entry.isSRet = true; + Entry.isNest = false; + Entry.isByVal = false; + Entry.Alignment = Align; + Args.push_back(Entry); + RetTy = Type::getVoidTy(FTy->getContext()); + } + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + SDValue ArgNode = getValue(*i); + Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); + + unsigned attrInd = i - CS.arg_begin() + 1; + Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.Alignment = CS.getParamAlignment(attrInd); + Args.push_back(Entry); + } + + if (LandingPad) { + // Insert a label before the invoke call to mark the try range. This can be + // used to detect deletion of the invoke via the MachineModuleInfo. + BeginLabel = MMI.getContext().CreateTempSymbol(); + + // For SjLj, keep track of which landing pads go with which invokes + // so as to maintain the ordering of pads in the LSDA. + unsigned CallSiteIndex = MMI.getCurrentCallSite(); + if (CallSiteIndex) { + MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); + // Now that the call site is handled, stop tracking it. + MMI.setCurrentCallSite(0); + } + + // Both PendingLoads and PendingExports must be flushed here; + // this call might not return. + (void)getRoot(); + DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel)); + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI.LowerCallTo. + if (isTailCall && + !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI)) + isTailCall = false; + + // If there's a possibility that fast-isel has already selected some amount + // of the current basic block, don't emit a tail call. + if (isTailCall && EnableFastISel) + isTailCall = false; + + std::pair<SDValue,SDValue> Result = + TLI.LowerCallTo(getRoot(), RetTy, + CS.paramHasAttr(0, Attribute::SExt), + CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), + CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), + CS.getCallingConv(), + isTailCall, + !CS.getInstruction()->use_empty(), + Callee, Args, DAG, getCurDebugLoc()); + assert((isTailCall || Result.second.getNode()) && + "Non-null chain expected with non-tail call!"); + assert((Result.second.getNode() || !Result.first.getNode()) && + "Null value expected with tail call!"); + if (Result.first.getNode()) { + setValue(CS.getInstruction(), Result.first); + } else if (!CanLowerReturn && Result.second.getNode()) { + // The instruction result is the result of loading from the + // hidden sret parameter. + SmallVector<EVT, 1> PVTs; + const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); + + ComputeValueVTs(TLI, PtrRetTy, PVTs); + assert(PVTs.size() == 1 && "Pointers should fit in one register"); + EVT PtrVT = PVTs[0]; + unsigned NumValues = Outs.size(); + SmallVector<SDValue, 4> Values(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, + DemoteStackSlot, + DAG.getConstant(Offsets[i], PtrVT)); + SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, + Add, NULL, Offsets[i], false, false, 1); + Values[i] = L; + Chains[i] = L.getValue(1); + } + + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + PendingLoads.push_back(Chain); + + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + SmallVector<EVT, 4> RetTys; + RetTy = FTy->getReturnType(); + ComputeValueVTs(TLI, RetTy, RetTys); + ISD::NodeType AssertOp = ISD::DELETED_NODE; + SmallVector<SDValue, 4> ReturnValues; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT); + + SDValue ReturnValue = + getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs, + RegisterVT, VT, AssertOp); + ReturnValues.push_back(ReturnValue); + CurReg += NumRegs; + } + + setValue(CS.getInstruction(), + DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&RetTys[0], RetTys.size()), + &ReturnValues[0], ReturnValues.size())); + + } + + // As a special case, a null chain means that a tail call has been emitted and + // the DAG root is already updated. + if (Result.second.getNode()) + DAG.setRoot(Result.second); + else + HasTailCall = true; + + if (LandingPad) { + // Insert a label at the end of the invoke call to mark the try range. This + // can be used to detect deletion of the invoke via the MachineModuleInfo. + MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); + DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel)); + + // Inform MachineModuleInfo of range. + MMI.addInvoke(LandingPad, BeginLabel, EndLabel); + } +} + +/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the +/// value is equal or not-equal to zero. +static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality()) + if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, + const Type *LoadTy, + SelectionDAGBuilder &Builder) { + + // Check to see if this load can be trivially constant folded, e.g. if the + // input is from a string literal. + if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { + // Cast pointer to the type we really want to load. + LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), + PointerType::getUnqual(LoadTy)); + + if (const Constant *LoadCst = + ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), + Builder.TD)) + return Builder.getValue(LoadCst); + } + + // Otherwise, we have to emit the load. If the pointer is to unfoldable but + // still constant memory, the input chain can be the entry node. + SDValue Root; + bool ConstantMemory = false; + + // Do not serialize (non-volatile) loads of constant memory with anything. + if (Builder.AA->pointsToConstantMemory(PtrVal)) { + Root = Builder.DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = Builder.DAG.getRoot(); + } + + SDValue Ptr = Builder.getValue(PtrVal); + SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, + Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/, + false /*volatile*/, + false /*nontemporal*/, 1 /* align=1 */); + + if (!ConstantMemory) + Builder.PendingLoads.push_back(LoadVal.getValue(1)); + return LoadVal; +} + + +/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. +/// If so, return true and lower it, otherwise return false and it will be +/// lowered like a normal call. +bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { + // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) + if (I.getNumArgOperands() != 3) + return false; + + const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); + if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || + !I.getArgOperand(2)->getType()->isIntegerTy() || + !I.getType()->isIntegerTy()) + return false; + + const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2)); + + // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 + // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 + if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { + bool ActuallyDoIt = true; + MVT LoadVT; + const Type *LoadTy; + switch (Size->getZExtValue()) { + default: + LoadVT = MVT::Other; + LoadTy = 0; + ActuallyDoIt = false; + break; + case 2: + LoadVT = MVT::i16; + LoadTy = Type::getInt16Ty(Size->getContext()); + break; + case 4: + LoadVT = MVT::i32; + LoadTy = Type::getInt32Ty(Size->getContext()); + break; + case 8: + LoadVT = MVT::i64; + LoadTy = Type::getInt64Ty(Size->getContext()); + break; + /* + case 16: + LoadVT = MVT::v4i32; + LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = VectorType::get(LoadTy, 4); + break; + */ + } + + // This turns into unaligned loads. We only do this if the target natively + // supports the MVT we'll be loading or if it is small enough (<= 4) that + // we'll only produce a small number of byte loads. + + // Require that we can find a legal MVT, and only do this if the target + // supports unaligned loads of that type. Expanding into byte loads would + // bloat the code. + if (ActuallyDoIt && Size->getZExtValue() > 4) { + // TODO: Handle 5 byte compare as 4-byte + 1 byte. + // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. + if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT)) + ActuallyDoIt = false; + } + + if (ActuallyDoIt) { + SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); + SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); + + SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal, + ISD::SETNE); + EVT CallVT = TLI.getValueType(I.getType(), true); + setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT)); + return true; + } + } + + + return false; +} + + +void SelectionDAGBuilder::visitCall(const CallInst &I) { + // Handle inline assembly differently. + if (isa<InlineAsm>(I.getCalledValue())) { + visitInlineAsm(&I); + return; + } + + const char *RenameFn = 0; + if (Function *F = I.getCalledFunction()) { + if (F->isDeclaration()) { + if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { + if (unsigned IID = II->getIntrinsicID(F)) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + if (unsigned IID = F->getIntrinsicID()) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + + // Check for well-known libc/libm calls. If the function is internal, it + // can't be a library call. + if (!F->hasLocalLinkage() && F->hasName()) { + StringRef Name = F->getName(); + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") { + if (I.getNumArgOperands() == 2 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.getType() == I.getArgOperand(1)->getType()) { + SDValue LHS = getValue(I.getArgOperand(0)); + SDValue RHS = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), + LHS.getValueType(), LHS, RHS)); + return; + } + } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (Name == "memcmp") { + if (visitMemCmpCall(I)) + return; + } + } + } + + SDValue Callee; + if (!RenameFn) + Callee = getValue(I.getCalledValue()); + else + Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + + // Check if we can potentially perform a tail call. More detailed checking is + // be done within LowerCallTo, after more information about the call is known. + LowerCallTo(&I, Callee, I.isTailCall()); +} + +namespace llvm { + +/// AsmOperandInfo - This contains information for each constraint that we are +/// lowering. +class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : + public TargetLowering::AsmOperandInfo { +public: + /// CallOperand - If this is the result output operand or a clobber + /// this is null, otherwise it is the incoming operand to the CallInst. + /// This gets modified as the asm is processed. + SDValue CallOperand; + + /// AssignedRegs - If this is a register or register class operand, this + /// contains the set of register corresponding to the operand. + RegsForValue AssignedRegs; + + explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info) + : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { + } + + /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers + /// busy in OutputRegs/InputRegs. + void MarkAllocatedRegs(bool isOutReg, bool isInReg, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs, + const TargetRegisterInfo &TRI) const { + if (isOutReg) { + for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) + MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI); + } + if (isInReg) { + for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) + MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI); + } + } + + /// getCallOperandValEVT - Return the EVT of the Value* that this operand + /// corresponds to. If there is no Value* for this operand, it returns + /// MVT::Other. + EVT getCallOperandValEVT(LLVMContext &Context, + const TargetLowering &TLI, + const TargetData *TD) const { + if (CallOperandVal == 0) return MVT::Other; + + if (isa<BasicBlock>(CallOperandVal)) + return TLI.getPointerTy(); + + const llvm::Type *OpTy = CallOperandVal->getType(); + + // If this is an indirect operand, the operand is a pointer to the + // accessed type. + if (isIndirect) { + const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + if (!PtrTy) + report_fatal_error("Indirect operand for inline asm not a pointer!"); + OpTy = PtrTy->getElementType(); + } + + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = TD->getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpTy = IntegerType::get(Context, BitSize); + break; + } + } + + return TLI.getValueType(OpTy, true); + } + +private: + /// MarkRegAndAliases - Mark the specified register and all aliases in the + /// specified set. + static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg"); + Regs.insert(Reg); + if (const unsigned *Aliases = TRI.getAliasSet(Reg)) + for (; *Aliases; ++Aliases) + Regs.insert(*Aliases); + } +}; + +} // end llvm namespace. + +/// isAllocatableRegister - If the specified register is safe to allocate, +/// i.e. it isn't a stack pointer or some other special register, return the +/// register class for the register. Otherwise, return null. +static const TargetRegisterClass * +isAllocatableRegister(unsigned Reg, MachineFunction &MF, + const TargetLowering &TLI, + const TargetRegisterInfo *TRI) { + EVT FoundVT = MVT::Other; + const TargetRegisterClass *FoundRC = 0; + for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), + E = TRI->regclass_end(); RCI != E; ++RCI) { + EVT ThisVT = MVT::Other; + + const TargetRegisterClass *RC = *RCI; + // If none of the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (TLI.isTypeLegal(*I)) { + // If we have already found this register in a different register class, + // choose the one with the largest VT specified. For example, on + // PowerPC, we favor f64 register classes over f32. + if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { + ThisVT = *I; + break; + } + } + } + + if (ThisVT == MVT::Other) continue; + + // NOTE: This isn't ideal. In particular, this might allocate the + // frame pointer in functions that need it (due to them not being taken + // out of allocation, because a variable sized allocation hasn't been seen + // yet). This is a slight code pessimization, but should still work. + for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), + E = RC->allocation_order_end(MF); I != E; ++I) + if (*I == Reg) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } + } + return FoundRC; +} + +/// GetRegistersForValue - Assign registers (virtual or physical) for the +/// specified operand. We prefer to assign virtual registers, to allow the +/// register allocator to handle the assignment process. However, if the asm +/// uses features that we can't model on machineinstrs, we have SDISel do the +/// allocation. This produces generally horrible, but correct, code. +/// +/// OpInfo describes the operand. +/// Input and OutputRegs are the set of already allocated physical registers. +/// +void SelectionDAGBuilder:: +GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs) { + LLVMContext &Context = FuncInfo.Fn->getContext(); + + // Compute whether this value requires an input register, an output register, + // or both. + bool isOutReg = false; + bool isInReg = false; + switch (OpInfo.Type) { + case InlineAsm::isOutput: + isOutReg = true; + + // If there is an input constraint that matches this, we need to reserve + // the input register so no other inputs allocate to it. + isInReg = OpInfo.hasMatchingInput(); + break; + case InlineAsm::isInput: + isInReg = true; + isOutReg = false; + break; + case InlineAsm::isClobber: + isOutReg = true; + isInReg = true; + break; + } + + + MachineFunction &MF = DAG.getMachineFunction(); + SmallVector<unsigned, 4> Regs; + + // If this is a constraint for a single physreg, or a constraint for a + // register class, find it. + std::pair<unsigned, const TargetRegisterClass*> PhysReg = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + + unsigned NumRegs = 1; + if (OpInfo.ConstraintVT != MVT::Other) { + // If this is a FP input in an integer register (or visa versa) insert a bit + // cast of the input value. More generally, handle any case where the input + // value disagrees with the register class we plan to stick this in. + if (OpInfo.Type == InlineAsm::isInput && + PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { + // Try to convert to the first EVT that the reg class contains. If the + // types are identical size, use a bitcast to convert (e.g. two differing + // vector types). + EVT RegVT = *PhysReg.second->vt_begin(); + if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { + // If the input is a FP value and we want it in FP registers, do a + // bitcast to the corresponding integer type. This turns an f64 value + // into i64, which can be passed with two i32 values on a 32-bit + // machine. + RegVT = EVT::getIntegerVT(Context, + OpInfo.ConstraintVT.getSizeInBits()); + OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } + } + + NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); + } + + EVT RegVT; + EVT ValueVT = OpInfo.ConstraintVT; + + // If this is a constraint for a specific physical register, like {r17}, + // assign it now. + if (unsigned AssignedReg = PhysReg.first) { + const TargetRegisterClass *RC = PhysReg.second; + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = *RC->vt_begin(); + + // Get the actual register value type. This is important, because the user + // may have asked for (e.g.) the AX register in i32 type. We need to + // remember that AX is actually i16 to get the right extension. + RegVT = *RC->vt_begin(); + + // This is a explicit reference to a physical register. + Regs.push_back(AssignedReg); + + // If this is an expanded reference, add the rest of the regs to Regs. + if (NumRegs != 1) { + TargetRegisterClass::iterator I = RC->begin(); + for (; *I != AssignedReg; ++I) + assert(I != RC->end() && "Didn't find reg!"); + + // Already added the first reg. + --NumRegs; ++I; + for (; NumRegs; --NumRegs, ++I) { + assert(I != RC->end() && "Ran out of registers to allocate!"); + Regs.push_back(*I); + } + } + + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); + return; + } + + // Otherwise, if this was a reference to an LLVM register class, create vregs + // for this reference. + if (const TargetRegisterClass *RC = PhysReg.second) { + RegVT = *RC->vt_begin(); + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = RegVT; + + // Create the appropriate number of virtual registers. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (; NumRegs; --NumRegs) + Regs.push_back(RegInfo.createVirtualRegister(RC)); + + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); + return; + } + + // This is a reference to a register class that doesn't directly correspond + // to an LLVM register class. Allocate NumRegs consecutive, available, + // registers from the class. + std::vector<unsigned> RegClassRegs + = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + unsigned NumAllocated = 0; + for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { + unsigned Reg = RegClassRegs[i]; + // See if this register is available. + if ((isOutReg && OutputRegs.count(Reg)) || // Already used. + (isInReg && InputRegs.count(Reg))) { // Already used. + // Make sure we find consecutive registers. + NumAllocated = 0; + continue; + } + + // Check to see if this register is allocatable (i.e. don't give out the + // stack pointer). + const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI); + if (!RC) { // Couldn't allocate this register. + // Reset NumAllocated to make sure we return consecutive registers. + NumAllocated = 0; + continue; + } + + // Okay, this register is good, we can use it. + ++NumAllocated; + + // If we allocated enough consecutive registers, succeed. + if (NumAllocated == NumRegs) { + unsigned RegStart = (i-NumAllocated)+1; + unsigned RegEnd = i+1; + // Mark all of the allocated registers used. + for (unsigned i = RegStart; i != RegEnd; ++i) + Regs.push_back(RegClassRegs[i]); + + OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), + OpInfo.ConstraintVT); + OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); + return; + } + } + + // Otherwise, we couldn't allocate enough registers for this. +} + +/// visitInlineAsm - Handle a call to an InlineAsm object. +/// +void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + + /// ConstraintOperands - Information about all of the constraints. + std::vector<SDISelAsmOperandInfo> ConstraintOperands; + + std::set<unsigned> OutputRegs, InputRegs; + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + std::vector<InlineAsm::ConstraintInfo> + ConstraintInfos = IA->ParseConstraints(); + + bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI); + + SDValue Chain, Flag; + + // We won't need to flush pending loads if this asm doesn't touch + // memory and is nonvolatile. + if (hasMemory || IA->hasSideEffects()) + Chain = getRoot(); + else + Chain = DAG.getRoot(); + + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i])); + SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); + + EVT OpVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + // Indirect outputs just consume an argument. + if (OpInfo.isIndirect) { + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + } + + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(!CS.getType()->isVoidTy() && + "Bad inline asm!"); + if (const StructType *STy = dyn_cast<StructType>(CS.getType())) { + OpVT = TLI.getValueType(STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpVT = TLI.getValueType(CS.getType()); + } + ++ResNo; + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + // If this is an input or an indirect output, process the call argument. + // BasicBlocks are labels, currently appearing only in asm's. + if (OpInfo.CallOperandVal) { + // Strip bitcasts, if any. This mostly comes up for functions. + OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts(); + + if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); + } else { + OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); + } + + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD); + } + + OpInfo.ConstraintVT = OpVT; + } + + // Second pass over the constraints: compute which constraint option to use + // and assign registers to constraints that want a specific physreg. + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (OpInfo.ConstraintVT.getSizeInBits() != + Input.ConstraintVT.getSizeInBits())) { + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); + } + Input.ConstraintVT = OpInfo.ConstraintVT; + } + } + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + + // If this is a memory input, and if the operand is not indirect, do what we + // need to to provide an address for the memory input. + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + !OpInfo.isIndirect) { + assert(OpInfo.Type == InlineAsm::isInput && + "Can only indirectify direct input operands!"); + + // Memory operands really want the address of the value. If we don't have + // an indirect input, put it in the constpool if we can, otherwise spill + // it to a stack slot. + + // If the operand is a float, integer, or vector constant, spill to a + // constant pool entry to get its address. + const Value *OpVal = OpInfo.CallOperandVal; + if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || + isa<ConstantVector>(OpVal)) { + OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), + TLI.getPointerTy()); + } else { + // Otherwise, create a stack slot and emit a store to it before the + // asm. + const Type *Ty = OpVal->getType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Chain = DAG.getStore(Chain, getCurDebugLoc(), + OpInfo.CallOperand, StackSlot, NULL, 0, + false, false, 0); + OpInfo.CallOperand = StackSlot; + } + + // There is no longer a Value* corresponding to this operand. + OpInfo.CallOperandVal = 0; + + // It is now an indirect operand. + OpInfo.isIndirect = true; + } + + // If this constraint is for a specific register, allocate it before + // anything else. + if (OpInfo.ConstraintType == TargetLowering::C_Register) + GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + } + + ConstraintInfos.clear(); + + // Second pass - Loop over all of the operands, assigning virtual or physregs + // to register class operands. + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // C_Register operands have already been allocated, Other/Memory don't need + // to be. + if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) + GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + } + + // AsmNodeOperands - The operands for the ISD::INLINEASM node. + std::vector<SDValue> AsmNodeOperands; + AsmNodeOperands.push_back(SDValue()); // reserve space for input chain + AsmNodeOperands.push_back( + DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), + TLI.getPointerTy())); + + // If we have a !srcloc metadata node associated with it, we want to attach + // this to the ultimately generated inline asm machineinstr. To do this, we + // pass in the third operand as this (potentially null) inline asm MDNode. + const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); + AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); + + // Remember the AlignStack bit as operand 3. + AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0, + MVT::i1)); + + // Loop over all of the inputs, copying the operand values into the + // appropriate registers and processing the output regs. + RegsForValue RetValRegs; + + // IndirectStoresToEmit - The set of stores to emit after the inline asm node. + std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; + + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + switch (OpInfo.Type) { + case InlineAsm::isOutput: { + if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && + OpInfo.ConstraintType != TargetLowering::C_Register) { + // Memory output, or 'other' output (e.g. 'X' constraint). + assert(OpInfo.isIndirect && "Memory output must be indirect operand"); + + // Add information to the INLINEASM node to know about this output. + unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, + TLI.getPointerTy())); + AsmNodeOperands.push_back(OpInfo.CallOperand); + break; + } + + // Otherwise, this is a register or register class output. + + // Copy the output from the appropriate register. Find a register that + // we can use. + if (OpInfo.AssignedRegs.Regs.empty()) + report_fatal_error("Couldn't allocate output reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'!"); + + // If this is an indirect operand, store through the pointer after the + // asm. + if (OpInfo.isIndirect) { + IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, + OpInfo.CallOperandVal)); + } else { + // This is the result value of the call. + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + // Concatenate this output onto the outputs list. + RetValRegs.append(OpInfo.AssignedRegs); + } + + // Add information to the INLINEASM node to know that this register is + // set. + OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? + InlineAsm::Kind_RegDefEarlyClobber : + InlineAsm::Kind_RegDef, + false, + 0, + DAG, + AsmNodeOperands); + break; + } + case InlineAsm::isInput: { + SDValue InOperandVal = OpInfo.CallOperand; + + if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? + // If this is required to match an output register we have already set, + // just use its register. + unsigned OperandNo = OpInfo.getMatchedOperand(); + + // Scan until we find the definition we already emitted of this operand. + // When we find it, create a RegsForValue operand. + unsigned CurOp = InlineAsm::Op_FirstOperand; + for (; OperandNo; --OperandNo) { + // Advance to the next operand. + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + assert((InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag) || + InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); + CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; + } + + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + if (InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { + // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. + if (OpInfo.isIndirect) { + // This happens on gcc/testsuite/gcc.dg/pr8788-1.c + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); + } + + RegsForValue MatchedRegs; + MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); + EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); + MatchedRegs.RegVTs.push_back(RegVT); + MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); + i != e; ++i) + MatchedRegs.Regs.push_back + (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT))); + + // Use the produced MatchedRegs object to + MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + Chain, &Flag); + MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, + true, OpInfo.getMatchedOperand(), + DAG, AsmNodeOperands); + break; + } + + assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); + assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && + "Unexpected number of operands"); + // Add information to the INLINEASM node to know about this input. + // See InlineAsm.h isUseOperandTiedToDef. + OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, + OpInfo.getMatchedOperand()); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + TLI.getPointerTy())); + AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); + break; + } + + // Treat indirect 'X' constraint as memory. + if (OpInfo.ConstraintType == TargetLowering::C_Other && + OpInfo.isIndirect) + OpInfo.ConstraintType = TargetLowering::C_Memory; + + if (OpInfo.ConstraintType == TargetLowering::C_Other) { + std::vector<SDValue> Ops; + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], + Ops, DAG); + if (Ops.empty()) + report_fatal_error("Invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'!"); + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = + InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); + break; + } + + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + assert(InOperandVal.getValueType() == TLI.getPointerTy() && + "Memory operands expect pointer values"); + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.push_back(InOperandVal); + break; + } + + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Register) && + "Unknown constraint type!"); + assert(!OpInfo.isIndirect && + "Don't know how to handle indirect register inputs yet!"); + + // Copy the input into the appropriate registers. + if (OpInfo.AssignedRegs.Regs.empty() || + !OpInfo.AssignedRegs.areValueTypesLegal(TLI)) + report_fatal_error("Couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'!"); + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + Chain, &Flag); + + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, + DAG, AsmNodeOperands); + break; + } + case InlineAsm::isClobber: { + // Add the clobbered value to the operand list, so that the register + // allocator is aware that the physreg got clobbered. + if (!OpInfo.AssignedRegs.Regs.empty()) + OpInfo.AssignedRegs.AddInlineAsmOperands( + InlineAsm::Kind_RegDefEarlyClobber, + false, 0, DAG, + AsmNodeOperands); + break; + } + } + } + + // Finish up input operands. Set the input chain and add the flag last. + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + if (Flag.getNode()) AsmNodeOperands.push_back(Flag); + + Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), + DAG.getVTList(MVT::Other, MVT::Flag), + &AsmNodeOperands[0], AsmNodeOperands.size()); + Flag = Chain.getValue(1); + + // If this asm returns a register value, copy the result from that register + // and set it as the value of the call. + if (!RetValRegs.Regs.empty()) { + SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), + Chain, &Flag); + + // FIXME: Why don't we do this for inline asms with MRVs? + if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { + EVT ResultType = TLI.getValueType(CS.getType()); + + // If any of the results of the inline asm is a vector, it may have the + // wrong width/num elts. This can happen for register classes that can + // contain multiple different value types. The preg or vreg allocated may + // not have the same VT as was expected. Convert it to the right type + // with bit_convert. + if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { + Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + ResultType, Val); + + } else if (ResultType != Val.getValueType() && + ResultType.isInteger() && Val.getValueType().isInteger()) { + // If a result value was tied to an input value, the computed result may + // have a wider width than the expected result. Extract the relevant + // portion. + Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val); + } + + assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); + } + + setValue(CS.getInstruction(), Val); + // Don't need to use this as a chain in this case. + if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) + return; + } + + std::vector<std::pair<SDValue, const Value *> > StoresToEmit; + + // Process indirect outputs, first output all of the flagged copies out of + // physregs. + for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { + RegsForValue &OutRegs = IndirectStoresToEmit[i].first; + const Value *Ptr = IndirectStoresToEmit[i].second; + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), + Chain, &Flag); + StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); + } + + // Emit the non-flagged stores from the physregs. + SmallVector<SDValue, 8> OutChains; + for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { + SDValue Val = DAG.getStore(Chain, getCurDebugLoc(), + StoresToEmit[i].first, + getValue(StoresToEmit[i].second), + StoresToEmit[i].second, 0, + false, false, 0); + OutChains.push_back(Val); + } + + if (!OutChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &OutChains[0], OutChains.size()); + + DAG.setRoot(Chain); +} + +void SelectionDAGBuilder::visitVAStart(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); +} + +void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { + const TargetData &TD = *TLI.getTargetData(); + SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), + getRoot(), getValue(I.getOperand(0)), + DAG.getSrcValue(I.getOperand(0)), + TD.getABITypeAlignment(I.getType())); + setValue(&I, V); + DAG.setRoot(V.getValue(1)); +} + +void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); +} + +void SelectionDAGBuilder::visitVACopy(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getSrcValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(1)))); +} + +/// TargetLowering::LowerCallTo - This is the default LowerCallTo +/// implementation, which just calls LowerCall. +/// FIXME: When all targets are +/// migrated to using LowerCall, this hook should be integrated into SDISel. +std::pair<SDValue, SDValue> +TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, + bool RetSExt, bool RetZExt, bool isVarArg, + bool isInreg, unsigned NumFixedArgs, + CallingConv::ID CallConv, bool isTailCall, + bool isReturnValueUsed, + SDValue Callee, + ArgListTy &Args, SelectionDAG &DAG, + DebugLoc dl) const { + // Handle all of the outgoing arguments. + SmallVector<ISD::OutputArg, 32> Outs; + SmallVector<SDValue, 32> OutVals; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*this, Args[i].Ty, ValueVTs); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + EVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext()); + SDValue Op = SDValue(Args[i].Node.getNode(), + Args[i].Node.getResNo() + Value); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + getTargetData()->getABITypeAlignment(ArgTy); + + if (Args[i].isZExt) + Flags.setZExt(); + if (Args[i].isSExt) + Flags.setSExt(); + if (Args[i].isInReg) + Flags.setInReg(); + if (Args[i].isSRet) + Flags.setSRet(); + if (Args[i].isByVal) { + Flags.setByVal(); + const PointerType *Ty = cast<PointerType>(Args[i].Ty); + const Type *ElementTy = Ty->getElementType(); + unsigned FrameAlign = getByValTypeAlignment(ElementTy); + unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); + // For ByVal, alignment should come from FE. BE will guess if this + // info is not there but there are cases it cannot get right. + if (Args[i].Alignment) + FrameAlign = Args[i].Alignment; + Flags.setByValAlign(FrameAlign); + Flags.setByValSize(FrameSize); + } + if (Args[i].isNest) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + EVT PartVT = getRegisterType(RetTy->getContext(), VT); + unsigned NumParts = getNumRegisters(RetTy->getContext(), VT); + SmallVector<SDValue, 4> Parts(NumParts); + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (Args[i].isSExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (Args[i].isZExt) + ExtendKind = ISD::ZERO_EXTEND; + + getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, + PartVT, ExtendKind); + + for (unsigned j = 0; j != NumParts; ++j) { + // if it isn't first piece, alignment must be 1 + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), + i < NumFixedArgs); + if (NumParts > 1 && j == 0) + MyFlags.Flags.setSplit(); + else if (j != 0) + MyFlags.Flags.setOrigAlign(1); + + Outs.push_back(MyFlags); + OutVals.push_back(Parts[j]); + } + } + } + + // Handle the incoming return values from the call. + SmallVector<ISD::InputArg, 32> Ins; + SmallVector<EVT, 4> RetTys; + ComputeValueVTs(*this, RetTy, RetTys); + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.Used = isReturnValueUsed; + if (RetSExt) + MyFlags.Flags.setSExt(); + if (RetZExt) + MyFlags.Flags.setZExt(); + if (isInreg) + MyFlags.Flags.setInReg(); + Ins.push_back(MyFlags); + } + } + + SmallVector<SDValue, 4> InVals; + Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, + Outs, OutVals, Ins, dl, DAG, InVals); + + // Verify that the target's LowerCall behaved as expected. + assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + "LowerCall didn't return a valid chain!"); + assert((!isTailCall || InVals.empty()) && + "LowerCall emitted a return value for a tail call!"); + assert((isTailCall || InVals.size() == Ins.size()) && + "LowerCall didn't emit the correct number of values!"); + + // For a tail call, the return value is merely live-out and there aren't + // any nodes in the DAG representing it. Return a special value to + // indicate that a tail call has been emitted and no more Instructions + // should be processed in the current block. + if (isTailCall) { + DAG.setRoot(Chain); + return std::make_pair(SDValue(), SDValue()); + } + + DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerCall emitted a null value!"); + assert(Ins[i].VT == InVals[i].getValueType() && + "LowerCall emitted a value with the wrong type!"); + }); + + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (RetSExt) + AssertOp = ISD::AssertSext; + else if (RetZExt) + AssertOp = ISD::AssertZext; + SmallVector<SDValue, 4> ReturnValues; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); + + ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg], + NumRegs, RegisterVT, VT, + AssertOp)); + CurReg += NumRegs; + } + + // For a function returning void, there is no return value. We can't create + // such a node, so we just return a null return value in that case. In + // that case, nothing will actualy look at the value. + if (ReturnValues.empty()) + return std::make_pair(SDValue(), Chain); + + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&RetTys[0], RetTys.size()), + &ReturnValues[0], ReturnValues.size()); + return std::make_pair(Res, Chain); +} + +void TargetLowering::LowerOperationWrapper(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + if (Res.getNode()) + Results.push_back(Res); +} + +SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + llvm_unreachable("LowerOperation not implemented for this target!"); + return SDValue(); +} + +void +SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { + SDValue Op = getNonRegisterValue(V); + assert((Op.getOpcode() != ISD::CopyFromReg || + cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && + "Copy from a reg to the same reg!"); + assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); + + RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); + PendingExports.push_back(Chain); +} + +#include "llvm/CodeGen/SelectionDAGISel.h" + +void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { + // If this is the entry block, emit arguments. + const Function &F = *LLVMBB->getParent(); + SelectionDAG &DAG = SDB->DAG; + DebugLoc dl = SDB->getCurDebugLoc(); + const TargetData *TD = TLI.getTargetData(); + SmallVector<ISD::InputArg, 16> Ins; + + // Check whether the function can return without sret-demotion. + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + Outs, TLI); + + if (!FuncInfo->CanLowerReturn) { + // Put in an sret pointer parameter before all the other parameters. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + + // NOTE: Assuming that a pointer will never break down to more than one VT + // or one register. + ISD::ArgFlagsTy Flags; + Flags.setSRet(); + EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); + ISD::InputArg RetArg(Flags, RegisterVT, true); + Ins.push_back(RetArg); + } + + // Set up the incoming argument description vector. + unsigned Idx = 1; + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I, ++Idx) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I->getType(), ValueVTs); + bool isArgValueUsed = !I->use_empty(); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + EVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + TD->getABITypeAlignment(ArgTy); + + if (F.paramHasAttr(Idx, Attribute::ZExt)) + Flags.setZExt(); + if (F.paramHasAttr(Idx, Attribute::SExt)) + Flags.setSExt(); + if (F.paramHasAttr(Idx, Attribute::InReg)) + Flags.setInReg(); + if (F.paramHasAttr(Idx, Attribute::StructRet)) + Flags.setSRet(); + if (F.paramHasAttr(Idx, Attribute::ByVal)) { + Flags.setByVal(); + const PointerType *Ty = cast<PointerType>(I->getType()); + const Type *ElementTy = Ty->getElementType(); + unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy); + unsigned FrameSize = TD->getTypeAllocSize(ElementTy); + // For ByVal, alignment should be passed from FE. BE will guess if + // this info is not there but there are cases it cannot get right. + if (F.getParamAlignment(Idx)) + FrameAlign = F.getParamAlignment(Idx); + Flags.setByValAlign(FrameAlign); + Flags.setByValSize(FrameSize); + } + if (F.paramHasAttr(Idx, Attribute::Nest)) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed); + if (NumRegs > 1 && i == 0) + MyFlags.Flags.setSplit(); + // if it isn't first piece, alignment must be 1 + else if (i > 0) + MyFlags.Flags.setOrigAlign(1); + Ins.push_back(MyFlags); + } + } + } + + // Call the target to set up the argument values. + SmallVector<SDValue, 8> InVals; + SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), + F.isVarArg(), Ins, + dl, DAG, InVals); + + // Verify that the target's LowerFormalArguments behaved as expected. + assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && + "LowerFormalArguments didn't return a valid chain!"); + assert(InVals.size() == Ins.size() && + "LowerFormalArguments didn't emit the correct number of values!"); + DEBUG({ + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerFormalArguments emitted a null value!"); + assert(Ins[i].VT == InVals[i].getValueType() && + "LowerFormalArguments emitted a value with the wrong type!"); + } + }); + + // Update the DAG with the new chain value resulting from argument lowering. + DAG.setRoot(NewRoot); + + // Set up the argument values. + unsigned i = 0; + Idx = 1; + if (!FuncInfo->CanLowerReturn) { + // Create a virtual register for the sret pointer, and put in a copy + // from the sret argument into it. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + EVT VT = ValueVTs[0]; + EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + ISD::NodeType AssertOp = ISD::DELETED_NODE; + SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, + RegVT, VT, AssertOp); + + MachineFunction& MF = SDB->DAG.getMachineFunction(); + MachineRegisterInfo& RegInfo = MF.getRegInfo(); + unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); + FuncInfo->DemoteRegister = SRetReg; + NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), + SRetReg, ArgValue); + DAG.setRoot(NewRoot); + + // i indexes lowered arguments. Bump it past the hidden sret argument. + // Idx indexes LLVM arguments. Don't touch it. + ++i; + } + + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + ++I, ++Idx) { + SmallVector<SDValue, 4> ArgValues; + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + + // If this argument is unused then remember its value. It is used to generate + // debugging information. + if (I->use_empty() && NumValues) + SDB->setUnusedArgValue(I, InVals[i]); + + for (unsigned Value = 0; Value != NumValues; ++Value) { + EVT VT = ValueVTs[Value]; + EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); + + if (!I->use_empty()) { + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (F.paramHasAttr(Idx, Attribute::SExt)) + AssertOp = ISD::AssertSext; + else if (F.paramHasAttr(Idx, Attribute::ZExt)) + AssertOp = ISD::AssertZext; + + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], + NumParts, PartVT, VT, + AssertOp)); + } + + i += NumParts; + } + + // Note down frame index for byval arguments. + if (I->hasByValAttr() && !ArgValues.empty()) + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) + FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex()); + + if (!I->use_empty()) { + SDValue Res; + if (!ArgValues.empty()) + Res = DAG.getMergeValues(&ArgValues[0], NumValues, + SDB->getCurDebugLoc()); + SDB->setValue(I, Res); + + // If this argument is live outside of the entry block, insert a copy from + // whereever we got it to the vreg that other BB's will reference it as. + SDB->CopyToExportRegsIfNeeded(I); + } + } + + assert(i == InVals.size() && "Argument register count mismatch!"); + + // Finally, if the target has anything special to do, allow it to do so. + // FIXME: this should insert code into the DAG! + EmitFunctionEntryCode(); +} + +/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to +/// ensure constants are generated when needed. Remember the virtual registers +/// that need to be added to the Machine PHI nodes as input. We cannot just +/// directly add them, because expansion might result in multiple MBB's for one +/// BB. As such, the start of the BB might correspond to a different MBB than +/// the end. +/// +void +SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + const BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::const_iterator I = SuccBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + unsigned Reg; + const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + if (const Constant *C = dyn_cast<Constant>(PHIOp)) { + unsigned &RegOut = ConstantsOut[C]; + if (RegOut == 0) { + RegOut = FuncInfo.CreateRegs(C->getType()); + CopyValueToVirtualRegister(C, RegOut); + } + Reg = RegOut; + } else { + DenseMap<const Value *, unsigned>::iterator I = + FuncInfo.ValueMap.find(PHIOp); + if (I != FuncInfo.ValueMap.end()) + Reg = I->second; + else { + assert(isa<AllocaInst>(PHIOp) && + FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && + "Didn't codegen value into a register!??"); + Reg = FuncInfo.CreateRegs(PHIOp->getType()); + CopyValueToVirtualRegister(PHIOp, Reg); + } + } + + // Remember that this register needs to added to the machine PHI node as + // the input for this MBB. + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); + for (unsigned i = 0, e = NumRegisters; i != e; ++i) + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + Reg += NumRegisters; + } + } + } + ConstantsOut.clear(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h new file mode 100644 index 0000000..5f400e9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -0,0 +1,537 @@ +//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef SELECTIONDAGBUILDER_H +#define SELECTIONDAGBUILDER_H + +#include "llvm/Constants.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" +#include <vector> +#include <set> + +namespace llvm { + +class AliasAnalysis; +class AllocaInst; +class BasicBlock; +class BitCastInst; +class BranchInst; +class CallInst; +class DbgValueInst; +class ExtractElementInst; +class ExtractValueInst; +class FCmpInst; +class FPExtInst; +class FPToSIInst; +class FPToUIInst; +class FPTruncInst; +class Function; +class FunctionLoweringInfo; +class GetElementPtrInst; +class GCFunctionInfo; +class ICmpInst; +class IntToPtrInst; +class IndirectBrInst; +class InvokeInst; +class InsertElementInst; +class InsertValueInst; +class Instruction; +class LoadInst; +class MachineBasicBlock; +class MachineInstr; +class MachineRegisterInfo; +class MDNode; +class PHINode; +class PtrToIntInst; +class ReturnInst; +class SDISelAsmOperandInfo; +class SDDbgValue; +class SExtInst; +class SelectInst; +class ShuffleVectorInst; +class SIToFPInst; +class StoreInst; +class SwitchInst; +class TargetData; +class TargetLowering; +class TruncInst; +class UIToFPInst; +class UnreachableInst; +class UnwindInst; +class VAArgInst; +class ZExtInst; + +//===----------------------------------------------------------------------===// +/// SelectionDAGBuilder - This is the common target-independent lowering +/// implementation that is parameterized by a TargetLowering object. +/// +class SelectionDAGBuilder { + /// CurDebugLoc - current file + line number. Changes as we build the DAG. + DebugLoc CurDebugLoc; + + DenseMap<const Value*, SDValue> NodeMap; + + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used + /// to preserve debug information for incoming arguments. + DenseMap<const Value*, SDValue> UnusedArgNodeMap; + + /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap. + class DanglingDebugInfo { + const DbgValueInst* DI; + DebugLoc dl; + unsigned SDNodeOrder; + public: + DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { } + DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : + DI(di), dl(DL), SDNodeOrder(SDNO) { } + const DbgValueInst* getDI() { return DI; } + DebugLoc getdl() { return dl; } + unsigned getSDNodeOrder() { return SDNodeOrder; } + }; + + /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not + /// yet seen the referent. We defer handling these until we do see it. + DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap; + +public: + /// PendingLoads - Loads are not emitted to the program immediately. We bunch + /// them up and then emit token factor nodes when possible. This allows us to + /// get simple disambiguation between loads without worrying about alias + /// analysis. + SmallVector<SDValue, 8> PendingLoads; +private: + + /// PendingExports - CopyToReg nodes that copy values to virtual registers + /// for export to other blocks need to be emitted before any terminator + /// instruction, but they have no other ordering requirements. We bunch them + /// up and the emit a single tokenfactor for them just before terminator + /// instructions. + SmallVector<SDValue, 8> PendingExports; + + /// SDNodeOrder - A unique monotonically increasing number used to order the + /// SDNodes we create. + unsigned SDNodeOrder; + + /// Case - A struct to record the Value for a switch case, and the + /// case's target basic block. + struct Case { + Constant* Low; + Constant* High; + MachineBasicBlock* BB; + + Case() : Low(0), High(0), BB(0) { } + Case(Constant* low, Constant* high, MachineBasicBlock* bb) : + Low(low), High(high), BB(bb) { } + APInt size() const { + const APInt &rHigh = cast<ConstantInt>(High)->getValue(); + const APInt &rLow = cast<ConstantInt>(Low)->getValue(); + return (rHigh - rLow + 1ULL); + } + }; + + struct CaseBits { + uint64_t Mask; + MachineBasicBlock* BB; + unsigned Bits; + + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits): + Mask(mask), BB(bb), Bits(bits) { } + }; + + typedef std::vector<Case> CaseVector; + typedef std::vector<CaseBits> CaseBitsVector; + typedef CaseVector::iterator CaseItr; + typedef std::pair<CaseItr, CaseItr> CaseRange; + + /// CaseRec - A struct with ctor used in lowering switches to a binary tree + /// of conditional branches. + struct CaseRec { + CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge, + CaseRange r) : + CaseBB(bb), LT(lt), GE(ge), Range(r) {} + + /// CaseBB - The MBB in which to emit the compare and branch + MachineBasicBlock *CaseBB; + /// LT, GE - If nonzero, we know the current case value must be less-than or + /// greater-than-or-equal-to these Constants. + const Constant *LT; + const Constant *GE; + /// Range - A pair of iterators representing the range of case values to be + /// processed at this point in the binary search tree. + CaseRange Range; + }; + + typedef std::vector<CaseRec> CaseRecVector; + + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator()(const Case &C1, const Case &C2) { + assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; + + struct CaseBitsCmp { + bool operator()(const CaseBits &C1, const CaseBits &C2) { + return C1.Bits > C2.Bits; + } + }; + + size_t Clusterify(CaseVector &Cases, const SwitchInst &SI); + + /// CaseBlock - This structure is used to communicate between + /// SelectionDAGBuilder and SDISel for the code generation of additional basic + /// blocks needed by multi-case switch statements. + struct CaseBlock { + CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, + const Value *cmpmiddle, + MachineBasicBlock *truebb, MachineBasicBlock *falsebb, + MachineBasicBlock *me) + : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), + TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {} + // CC - the condition code to use for the case block's setcc node + ISD::CondCode CC; + // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit. + // Emit by default LHS op RHS. MHS is used for range comparisons: + // If MHS is not null: (LHS <= MHS) and (MHS <= RHS). + const Value *CmpLHS, *CmpMHS, *CmpRHS; + // TrueBB/FalseBB - the block to branch to if the setcc is true/false. + MachineBasicBlock *TrueBB, *FalseBB; + // ThisBB - the block into which to emit the code for the setcc and branches + MachineBasicBlock *ThisBB; + }; + struct JumpTable { + JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, + MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} + + /// Reg - the virtual register containing the index of the jump table entry + //. to jump to. + unsigned Reg; + /// JTI - the JumpTableIndex for this jump table in the function. + unsigned JTI; + /// MBB - the MBB into which to emit the code for the indirect jump. + MachineBasicBlock *MBB; + /// Default - the MBB of the default bb, which is a successor of the range + /// check MBB. This is when updating PHI nodes in successors. + MachineBasicBlock *Default; + }; + struct JumpTableHeader { + JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, + bool E = false): + First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} + APInt First; + APInt Last; + const Value *SValue; + MachineBasicBlock *HeaderBB; + bool Emitted; + }; + typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock; + + struct BitTestCase { + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr): + Mask(M), ThisBB(T), TargetBB(Tr) { } + uint64_t Mask; + MachineBasicBlock *ThisBB; + MachineBasicBlock *TargetBB; + }; + + typedef SmallVector<BitTestCase, 3> BitTestInfo; + + struct BitTestBlock { + BitTestBlock(APInt F, APInt R, const Value* SV, + unsigned Rg, bool E, + MachineBasicBlock* P, MachineBasicBlock* D, + const BitTestInfo& C): + First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E), + Parent(P), Default(D), Cases(C) { } + APInt First; + APInt Range; + const Value *SValue; + unsigned Reg; + bool Emitted; + MachineBasicBlock *Parent; + MachineBasicBlock *Default; + BitTestInfo Cases; + }; + +public: + // TLI - This is information that describes the available target features we + // need for lowering. This indicates when operations are unavailable, + // implemented with a libcall, etc. + const TargetMachine &TM; + const TargetLowering &TLI; + SelectionDAG &DAG; + const TargetData *TD; + AliasAnalysis *AA; + + /// SwitchCases - Vector of CaseBlock structures used to communicate + /// SwitchInst code generation information. + std::vector<CaseBlock> SwitchCases; + /// JTCases - Vector of JumpTable structures used to communicate + /// SwitchInst code generation information. + std::vector<JumpTableBlock> JTCases; + /// BitTestCases - Vector of BitTestBlock structures used to communicate + /// SwitchInst code generation information. + std::vector<BitTestBlock> BitTestCases; + + // Emit PHI-node-operand constants only once even if used by multiple + // PHI nodes. + DenseMap<const Constant *, unsigned> ConstantsOut; + + /// FuncInfo - Information about the function as a whole. + /// + FunctionLoweringInfo &FuncInfo; + + /// OptLevel - What optimization level we're generating code for. + /// + CodeGenOpt::Level OptLevel; + + /// GFI - Garbage collection metadata for the function. + GCFunctionInfo *GFI; + + /// HasTailCall - This is set to true if a call in the current + /// block has been translated as a tail call. In this case, + /// no subsequent DAG nodes should be created. + /// + bool HasTailCall; + + LLVMContext *Context; + + SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, + CodeGenOpt::Level ol) + : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + DAG(dag), FuncInfo(funcinfo), OptLevel(ol), + HasTailCall(false), Context(dag.getContext()) { + } + + void init(GCFunctionInfo *gfi, AliasAnalysis &aa); + + /// clear - Clear out the current SelectionDAG and the associated + /// state and prepare this SelectionDAGBuilder object to be used + /// for a new block. This doesn't clear out information about + /// additional blocks that are needed to complete switch lowering + /// or PHI node updating; that information is cleared out as it is + /// consumed. + void clear(); + + /// getRoot - Return the current virtual root of the Selection DAG, + /// flushing any PendingLoad items. This must be done before emitting + /// a store or any other node that may need to be ordered after any + /// prior load instructions. + /// + SDValue getRoot(); + + /// getControlRoot - Similar to getRoot, but instead of flushing all the + /// PendingLoad items, flush all the PendingExports items. It is necessary + /// to do this before emitting a terminator instruction. + /// + SDValue getControlRoot(); + + DebugLoc getCurDebugLoc() const { return CurDebugLoc; } + + unsigned getSDNodeOrder() const { return SDNodeOrder; } + + void CopyValueToVirtualRegister(const Value *V, unsigned Reg); + + /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten + /// from how the code appeared in the source. The ordering is used by the + /// scheduler to effectively turn off scheduling. + void AssignOrderingToNode(const SDNode *Node); + + void visit(const Instruction &I); + + void visit(unsigned Opcode, const User &I); + + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, + // generate the debug data structures now that we've seen its definition. + void resolveDanglingDebugInfo(const Value *V, SDValue Val); + SDValue getValue(const Value *V); + SDValue getNonRegisterValue(const Value *V); + SDValue getValueImpl(const Value *V); + + void setValue(const Value *V, SDValue NewN) { + SDValue &N = NodeMap[V]; + assert(N.getNode() == 0 && "Already set a value for this node!"); + N = NewN; + } + + void setUnusedArgValue(const Value *V, SDValue NewN) { + SDValue &N = UnusedArgNodeMap[V]; + assert(N.getNode() == 0 && "Already set a value for this node!"); + N = NewN; + } + + void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs); + + void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, unsigned Opc); + void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB); + bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); + bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); + void CopyToExportRegsIfNeeded(const Value *V); + void ExportFromCurrentBlock(const Value *V); + void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, + MachineBasicBlock *LandingPad = NULL); + +private: + // Terminator instructions. + void visitRet(const ReturnInst &I); + void visitBr(const BranchInst &I); + void visitSwitch(const SwitchInst &I); + void visitIndirectBr(const IndirectBrInst &I); + void visitUnreachable(const UnreachableInst &I) { /* noop */ } + + // Helpers for visitSwitch + bool handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); + bool handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); + bool handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); + bool handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); +public: + void visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB); + void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); + void visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + BitTestCase &B, + MachineBasicBlock *SwitchBB); + void visitJumpTable(JumpTable &JT); + void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB); + +private: + // These all get lowered before this pass. + void visitInvoke(const InvokeInst &I); + void visitUnwind(const UnwindInst &I); + + void visitBinary(const User &I, unsigned OpCode); + void visitShift(const User &I, unsigned Opcode); + void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } + void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } + void visitSub(const User &I) { visitBinary(I, ISD::SUB); } + void visitFSub(const User &I); + void visitMul(const User &I) { visitBinary(I, ISD::MUL); } + void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); } + void visitURem(const User &I) { visitBinary(I, ISD::UREM); } + void visitSRem(const User &I) { visitBinary(I, ISD::SREM); } + void visitFRem(const User &I) { visitBinary(I, ISD::FREM); } + void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); } + void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); } + void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); } + void visitAnd (const User &I) { visitBinary(I, ISD::AND); } + void visitOr (const User &I) { visitBinary(I, ISD::OR); } + void visitXor (const User &I) { visitBinary(I, ISD::XOR); } + void visitShl (const User &I) { visitShift(I, ISD::SHL); } + void visitLShr(const User &I) { visitShift(I, ISD::SRL); } + void visitAShr(const User &I) { visitShift(I, ISD::SRA); } + void visitICmp(const User &I); + void visitFCmp(const User &I); + // Visit the conversion instructions + void visitTrunc(const User &I); + void visitZExt(const User &I); + void visitSExt(const User &I); + void visitFPTrunc(const User &I); + void visitFPExt(const User &I); + void visitFPToUI(const User &I); + void visitFPToSI(const User &I); + void visitUIToFP(const User &I); + void visitSIToFP(const User &I); + void visitPtrToInt(const User &I); + void visitIntToPtr(const User &I); + void visitBitCast(const User &I); + + void visitExtractElement(const User &I); + void visitInsertElement(const User &I); + void visitShuffleVector(const User &I); + + void visitExtractValue(const ExtractValueInst &I); + void visitInsertValue(const InsertValueInst &I); + + void visitGetElementPtr(const User &I); + void visitSelect(const User &I); + + void visitAlloca(const AllocaInst &I); + void visitLoad(const LoadInst &I); + void visitStore(const StoreInst &I); + void visitPHI(const PHINode &I); + void visitCall(const CallInst &I); + bool visitMemCmpCall(const CallInst &I); + + void visitInlineAsm(ImmutableCallSite CS); + const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); + void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); + + void visitPow(const CallInst &I); + void visitExp2(const CallInst &I); + void visitExp(const CallInst &I); + void visitLog(const CallInst &I); + void visitLog2(const CallInst &I); + void visitLog10(const CallInst &I); + + void visitVAStart(const CallInst &I); + void visitVAArg(const VAArgInst &I); + void visitVAEnd(const CallInst &I); + void visitVACopy(const CallInst &I); + + void visitUserOp1(const Instruction &I) { + llvm_unreachable("UserOp1 should not exist at instruction selection time!"); + } + void visitUserOp2(const Instruction &I) { + llvm_unreachable("UserOp2 should not exist at instruction selection time!"); + } + + const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op); + const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op); + + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); + + /// EmitFuncArgumentDbgValue - If V is an function argument then create + /// corresponding DBG_VALUE machine instruction for it now. At the end of + /// instruction selection, they will be inserted to the entry BB. + bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + int64_t Offset, const SDValue &N); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp new file mode 100644 index 0000000..66cb5ce --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -0,0 +1,2553 @@ +//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAGISel class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "ScheduleDAGSDNodes.h" +#include "SelectionDAGBuilder.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); +STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); + +static cl::opt<bool> +EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, + cl::desc("Enable verbose messages in the \"fast\" " + "instruction selector")); +static cl::opt<bool> +EnableFastISelAbort("fast-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction fails")); + +#ifndef NDEBUG +static cl::opt<bool> +ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the first " + "dag combine pass")); +static cl::opt<bool> +ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize types")); +static cl::opt<bool> +ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize")); +static cl::opt<bool> +ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the second " + "dag combine pass")); +static cl::opt<bool> +ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the post legalize types" + " dag combine pass")); +static cl::opt<bool> +ViewISelDAGs("view-isel-dags", cl::Hidden, + cl::desc("Pop up a window to show isel dags as they are selected")); +static cl::opt<bool> +ViewSchedDAGs("view-sched-dags", cl::Hidden, + cl::desc("Pop up a window to show sched dags as they are processed")); +static cl::opt<bool> +ViewSUnitDAGs("view-sunit-dags", cl::Hidden, + cl::desc("Pop up a window to show SUnit dags after they are processed")); +#else +static const bool ViewDAGCombine1 = false, + ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, + ViewDAGCombine2 = false, + ViewDAGCombineLT = false, + ViewISelDAGs = false, ViewSchedDAGs = false, + ViewSUnitDAGs = false; +#endif + +//===---------------------------------------------------------------------===// +/// +/// RegisterScheduler class - Track the registration of instruction schedulers. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry RegisterScheduler::Registry; + +//===---------------------------------------------------------------------===// +/// +/// ISHeuristic command line option for instruction schedulers. +/// +//===---------------------------------------------------------------------===// +static cl::opt<RegisterScheduler::FunctionPassCtor, false, + RegisterPassParser<RegisterScheduler> > +ISHeuristic("pre-RA-sched", + cl::init(&createDefaultScheduler), + cl::desc("Instruction schedulers available (before register" + " allocation):")); + +static RegisterScheduler +defaultListDAGScheduler("default", "Best scheduler for the target", + createDefaultScheduler); + +namespace llvm { + //===--------------------------------------------------------------------===// + /// createDefaultScheduler - This creates an instruction scheduler appropriate + /// for the target. + ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetLowering &TLI = IS->getTargetLowering(); + + if (OptLevel == CodeGenOpt::None) + return createSourceListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::Latency) + return createTDListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::RegPressure) + return createBURRListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::Hybrid) + return createHybridListDAGScheduler(IS, OptLevel); + assert(TLI.getSchedulingPreference() == Sched::ILP && + "Unknown sched type!"); + return createILPListDAGScheduler(IS, OptLevel); + } +} + +// EmitInstrWithCustomInserter - This method should be implemented by targets +// that mark instructions with the 'usesCustomInserter' flag. These +// instructions are special in various ways, which require special support to +// insert. The specified MachineInstr is created but not inserted into any +// basic blocks, and this method is called to expand it into a sequence of +// instructions, potentially also creating new basic blocks and control flow. +// When new basic blocks are inserted and the edges from MBB to its successors +// are modified, the method should insert pairs of <OldSucc, NewSucc> into the +// DenseMap. +MachineBasicBlock * +TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { +#ifndef NDEBUG + dbgs() << "If a target marks an instruction with " + "'usesCustomInserter', it must implement " + "TargetLowering::EmitInstrWithCustomInserter!"; +#endif + llvm_unreachable(0); + return 0; +} + +//===----------------------------------------------------------------------===// +// SelectionDAGISel code +//===----------------------------------------------------------------------===// + +SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : + MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), + FuncInfo(new FunctionLoweringInfo(TLI)), + CurDAG(new SelectionDAG(tm)), + SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), + GFI(), + OptLevel(OL), + DAGSize(0) +{} + +SelectionDAGISel::~SelectionDAGISel() { + delete SDB; + delete CurDAG; + delete FuncInfo; +} + +void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); + AU.addRequired<GCModuleInfo>(); + AU.addPreserved<GCModuleInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or +/// other function that gcc recognizes as "returning twice". This is used to +/// limit code-gen optimizations on the machine function. +/// +/// FIXME: Remove after <rdar://problem/8031714> is fixed. +static bool FunctionCallsSetJmp(const Function *F) { + const Module *M = F->getParent(); + static const char *ReturnsTwiceFns[] = { + "setjmp", + "sigsetjmp", + "setjmp_syscall", + "savectx", + "qsetjmp", + "vfork", + "getcontext" + }; +#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *) + + for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I) + if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) { + if (!Callee->use_empty()) + for (Value::const_use_iterator + I = Callee->use_begin(), E = Callee->use_end(); + I != E; ++I) + if (const CallInst *CI = dyn_cast<CallInst>(*I)) + if (CI->getParent()->getParent() == F) + return true; + } + + return false; +#undef NUM_RETURNS_TWICE_FNS +} + +bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { + // Do some sanity-checking on the command-line options. + assert((!EnableFastISelVerbose || EnableFastISel) && + "-fast-isel-verbose requires -fast-isel"); + assert((!EnableFastISelAbort || EnableFastISel) && + "-fast-isel-abort requires -fast-isel"); + + const Function &Fn = *mf.getFunction(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + + MF = &mf; + RegInfo = &MF->getRegInfo(); + AA = &getAnalysis<AliasAnalysis>(); + GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; + + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); + + CurDAG->init(*MF); + FuncInfo->set(Fn, *MF); + SDB->init(GFI, *AA); + + SelectAllBasicBlocks(Fn); + + // If the first basic block in the function has live ins that need to be + // copied into vregs, emit the copies into the top of the block before + // emitting the code for the block. + MachineBasicBlock *EntryMBB = MF->begin(); + RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII); + + DenseMap<unsigned, unsigned> LiveInMap; + if (!FuncInfo->ArgDbgValues.empty()) + for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), + E = RegInfo->livein_end(); LI != E; ++LI) + if (LI->second) + LiveInMap.insert(std::make_pair(LI->first, LI->second)); + + // Insert DBG_VALUE instructions for function arguments to the entry block. + for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { + MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + EntryMBB->insert(EntryMBB->begin(), MI); + else { + MachineInstr *Def = RegInfo->getVRegDef(Reg); + MachineBasicBlock::iterator InsertPos = Def; + // FIXME: VR def may not be in entry block. + Def->getParent()->insert(llvm::next(InsertPos), MI); + } + + // If Reg is live-in then update debug info to track its copy in a vreg. + DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg); + if (LDI != LiveInMap.end()) { + MachineInstr *Def = RegInfo->getVRegDef(LDI->second); + MachineBasicBlock::iterator InsertPos = Def; + const MDNode *Variable = + MI->getOperand(MI->getNumOperands()-1).getMetadata(); + unsigned Offset = MI->getOperand(1).getImm(); + // Def is never a terminator here, so it is ok to increment InsertPos. + BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), + TII.get(TargetOpcode::DBG_VALUE)) + .addReg(LDI->second, RegState::Debug) + .addImm(Offset).addMetadata(Variable); + } + } + + // Determine if there are any calls in this machine function. + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (!MFI->hasCalls()) { + for (MachineFunction::const_iterator + I = MF->begin(), E = MF->end(); I != E; ++I) { + const MachineBasicBlock *MBB = I; + for (MachineBasicBlock::const_iterator + II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { + const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); + + // Operand 1 of an inline asm instruction indicates whether the asm + // needs stack or not. + if ((II->isInlineAsm() && II->getOperand(1).getImm()) || + (TID.isCall() && !TID.isReturn())) { + MFI->setHasCalls(true); + goto done; + } + } + } + done:; + } + + // Determine if there is a call to setjmp in the machine function. + MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn)); + + // Replace forward-declared registers with the registers containing + // the desired value. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (DenseMap<unsigned, unsigned>::iterator + I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); + I != E; ++I) { + unsigned From = I->first; + unsigned To = I->second; + // If To is also scheduled to be replaced, find what its ultimate + // replacement is. + for (;;) { + DenseMap<unsigned, unsigned>::iterator J = + FuncInfo->RegFixups.find(To); + if (J == E) break; + To = J->second; + } + // Replace it. + MRI.replaceRegWith(From, To); + } + + // Release function-specific state. SDB and CurDAG are already cleared + // at this point. + FuncInfo->clear(); + + return true; +} + +void +SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall) { + // Lower all of the non-terminator instructions. If a call is emitted + // as a tail call, cease emitting nodes for this block. Terminators + // are handled below. + for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) + SDB->visit(*I); + + // Make sure the root of the DAG is up-to-date. + CurDAG->setRoot(SDB->getControlRoot()); + HadTailCall = SDB->HasTailCall; + SDB->clear(); + + // Final step, emit the lowered DAG as machine code. + CodeGenAndEmitDAG(); +} + +void SelectionDAGISel::ComputeLiveOutVRegInfo() { + SmallPtrSet<SDNode*, 128> VisitedNodes; + SmallVector<SDNode*, 128> Worklist; + + Worklist.push_back(CurDAG->getRoot().getNode()); + + APInt Mask; + APInt KnownZero; + APInt KnownOne; + + do { + SDNode *N = Worklist.pop_back_val(); + + // If we've already seen this node, ignore it. + if (!VisitedNodes.insert(N)) + continue; + + // Otherwise, add all chain operands to the worklist. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + Worklist.push_back(N->getOperand(i).getNode()); + + // If this is a CopyToReg with a vreg dest, process it. + if (N->getOpcode() != ISD::CopyToReg) + continue; + + unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + continue; + + // Ignore non-scalar or non-integer values. + SDValue Src = N->getOperand(2); + EVT SrcVT = Src.getValueType(); + if (!SrcVT.isInteger() || SrcVT.isVector()) + continue; + + unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); + Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); + CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); + + // Only install this information if it tells us something. + if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) { + DestReg -= TargetRegisterInfo::FirstVirtualRegister; + if (DestReg >= FuncInfo->LiveOutRegInfo.size()) + FuncInfo->LiveOutRegInfo.resize(DestReg+1); + FunctionLoweringInfo::LiveOutInfo &LOI = + FuncInfo->LiveOutRegInfo[DestReg]; + LOI.NumSignBits = NumSignBits; + LOI.KnownOne = KnownOne; + LOI.KnownZero = KnownZero; + } + } while (!Worklist.empty()); +} + +void SelectionDAGISel::CodeGenAndEmitDAG() { + std::string GroupName; + if (TimePassesIsEnabled) + GroupName = "Instruction Selection and Scheduling"; + std::string BlockName; + if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || + ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || + ViewSUnitDAGs) + BlockName = MF->getFunction()->getNameStr() + ":" + + FuncInfo->MBB->getBasicBlock()->getNameStr(); + + DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump()); + + if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); + + // Run the DAG combiner in pre-legalize mode. + { + NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); + CurDAG->Combine(Unrestricted, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump()); + + // Second step, hack on the DAG until it only uses operations and types that + // the target supports. + if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + + BlockName); + + bool Changed; + { + NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled); + Changed = CurDAG->LegalizeTypes(); + } + + DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump()); + + if (Changed) { + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lt input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + { + NamedRegionTimer T("DAG Combining after legalize types", GroupName, + TimePassesIsEnabled); + CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"; + CurDAG->dump()); + } + + { + NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled); + Changed = CurDAG->LegalizeVectors(); + } + + if (Changed) { + { + NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled); + CurDAG->LegalizeTypes(); + } + + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lv input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, + TimePassesIsEnabled); + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"; + CurDAG->dump()); + } + + if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); + + { + NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); + CurDAG->Legalize(OptLevel); + } + + DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump()); + + if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); + + // Run the DAG combiner in post-legalize mode. + { + NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump()); + + if (OptLevel != CodeGenOpt::None) + ComputeLiveOutVRegInfo(); + + if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); + + // Third, instruction select all of the operations to machine code, adding the + // code to the MachineBasicBlock. + { + NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled); + DoInstructionSelection(); + } + + DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump()); + + if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); + + // Schedule machine code. + ScheduleDAGSDNodes *Scheduler = CreateScheduler(); + { + NamedRegionTimer T("Instruction Scheduling", GroupName, + TimePassesIsEnabled); + Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt); + } + + if (ViewSUnitDAGs) Scheduler->viewGraph(); + + // Emit machine code to BB. This can change 'BB' to the last block being + // inserted into. + { + NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); + + FuncInfo->MBB = Scheduler->EmitSchedule(); + FuncInfo->InsertPt = Scheduler->InsertPos; + } + + // Free the scheduler state. + { + NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName, + TimePassesIsEnabled); + delete Scheduler; + } + + // Free the SelectionDAG state, now that we're finished with it. + CurDAG->clear(); +} + +void SelectionDAGISel::DoInstructionSelection() { + DEBUG(errs() << "===== Instruction selection begins:\n"); + + PreprocessISelDAG(); + + // Select target instructions for the DAG. + { + // Number all nodes with a topological order and set DAGSize. + DAGSize = CurDAG->AssignTopologicalOrder(); + + // Create a dummy node (which is not added to allnodes), that adds + // a reference to the root node, preventing it from being deleted, + // and tracking any changes of the root. + HandleSDNode Dummy(CurDAG->getRoot()); + ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode()); + ++ISelPosition; + + // The AllNodes list is now topological-sorted. Visit the + // nodes by starting at the end of the list (the root of the + // graph) and preceding back toward the beginning (the entry + // node). + while (ISelPosition != CurDAG->allnodes_begin()) { + SDNode *Node = --ISelPosition; + // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes, + // but there are currently some corner cases that it misses. Also, this + // makes it theoretically possible to disable the DAGCombiner. + if (Node->use_empty()) + continue; + + SDNode *ResNode = Select(Node); + + // FIXME: This is pretty gross. 'Select' should be changed to not return + // anything at all and this code should be nuked with a tactical strike. + + // If node should not be replaced, continue with the next one. + if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) + continue; + // Replace node. + if (ResNode) + ReplaceUses(Node, ResNode); + + // If after the replacement this node is not used any more, + // remove this dead node. + if (Node->use_empty()) { // Don't delete EntryToken, etc. + ISelUpdater ISU(ISelPosition); + CurDAG->RemoveDeadNode(Node, &ISU); + } + } + + CurDAG->setRoot(Dummy.getValue()); + } + + DEBUG(errs() << "===== Instruction selection ends:\n"); + + PostprocessISelDAG(); +} + +/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and +/// do other setup for EH landing-pad blocks. +void SelectionDAGISel::PrepareEHLandingPad() { + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB); + + const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); + BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) + .addSym(Label); + + // Mark exception register as live in. + unsigned Reg = TLI.getExceptionAddressRegister(); + if (Reg) FuncInfo->MBB->addLiveIn(Reg); + + // Mark exception selector register as live in. + Reg = TLI.getExceptionSelectorRegister(); + if (Reg) FuncInfo->MBB->addLiveIn(Reg); + + // FIXME: Hack around an exception handling flaw (PR1508): the personality + // function and list of typeids logically belong to the invoke (or, if you + // like, the basic block containing the invoke), and need to be associated + // with it in the dwarf exception handling tables. Currently however the + // information is provided by an intrinsic (eh.selector) that can be moved + // to unexpected places by the optimizers: if the unwind edge is critical, + // then breaking it can result in the intrinsics being in the successor of + // the landing pad, not the landing pad itself. This results + // in exceptions not being caught because no typeids are associated with + // the invoke. This may not be the only way things can go wrong, but it + // is the only way we try to work around for the moment. + const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock(); + const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); + + if (Br && Br->isUnconditional()) { // Critical edge? + BasicBlock::const_iterator I, E; + for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) + if (isa<EHSelectorInst>(I)) + break; + + if (I == E) + // No catch info found - try to extract some from the successor. + CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo); + } +} + +void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { + // Initialize the Fast-ISel state, if needed. + FastISel *FastIS = 0; + if (EnableFastISel) + FastIS = TLI.createFastISel(*FuncInfo); + + // Iterate over all basic blocks in the function. + for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + const BasicBlock *LLVMBB = &*I; + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); + + BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); + BasicBlock::const_iterator const End = LLVMBB->end(); + BasicBlock::const_iterator BI = End; + + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); + + // Setup an EH landing-pad block. + if (FuncInfo->MBB->isLandingPad()) + PrepareEHLandingPad(); + + // Lower any arguments needed in this block if this is the entry block. + if (LLVMBB == &Fn.getEntryBlock()) + LowerArguments(LLVMBB); + + // Before doing SelectionDAG ISel, see if FastISel has been requested. + if (FastIS) { + FastIS->startNewBlock(); + + // Emit code for any incoming arguments. This must happen before + // beginning FastISel on the entry block. + if (LLVMBB == &Fn.getEntryBlock()) { + CurDAG->setRoot(SDB->getControlRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // If we inserted any instructions at the beginning, make a note of + // where they are, so we can be sure to emit subsequent instructions + // after them. + if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) + FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt)); + else + FastIS->setLastLocalValue(0); + } + + // Do FastISel on as many instructions as possible. + for (; BI != Begin; --BI) { + const Instruction *Inst = llvm::prior(BI); + + // If we no longer require this instruction, skip it. + if (!Inst->mayWriteToMemory() && + !isa<TerminatorInst>(Inst) && + !isa<DbgInfoIntrinsic>(Inst) && + !FuncInfo->isExportedInst(Inst)) + continue; + + // Bottom-up: reset the insert pos at the top, after any local-value + // instructions. + FastIS->recomputeInsertPt(); + + // Try to select the instruction with FastISel. + if (FastIS->SelectInstruction(Inst)) + continue; + + // Then handle certain instructions as single-LLVM-Instruction blocks. + if (isa<CallInst>(Inst)) { + ++NumFastIselFailures; + if (EnableFastISelVerbose || EnableFastISelAbort) { + dbgs() << "FastISel missed call: "; + Inst->dump(); + } + + if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { + unsigned &R = FuncInfo->ValueMap[Inst]; + if (!R) + R = FuncInfo->CreateRegs(Inst->getType()); + } + + bool HadTailCall = false; + SelectBasicBlock(Inst, BI, HadTailCall); + + // If the call was emitted as a tail call, we're done with the block. + if (HadTailCall) { + --BI; + break; + } + + continue; + } + + // Otherwise, give up on FastISel for the rest of the block. + // For now, be a little lenient about non-branch terminators. + if (!isa<TerminatorInst>(Inst) || isa<BranchInst>(Inst)) { + ++NumFastIselFailures; + if (EnableFastISelVerbose || EnableFastISelAbort) { + dbgs() << "FastISel miss: "; + Inst->dump(); + } + if (EnableFastISelAbort) + // The "fast" selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + llvm_unreachable("FastISel didn't select the entire block"); + } + break; + } + + FastIS->recomputeInsertPt(); + } + + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); + + FinishBasicBlock(); + FuncInfo->PHINodesToUpdate.clear(); + } + + delete FastIS; +} + +void +SelectionDAGISel::FinishBasicBlock() { + + DEBUG(dbgs() << "Total amount of phi nodes to update: " + << FuncInfo->PHINodesToUpdate.size() << "\n"; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) + dbgs() << "Node " << i << " : (" + << FuncInfo->PHINodesToUpdate[i].first + << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); + + // Next, now that we know what the last MBB the LLVM BB expanded is, update + // PHI nodes in successors. + if (SDB->SwitchCases.empty() && + SDB->JTCases.empty() && + SDB->BitTestCases.empty()) { + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) + continue; + PHI->addOperand( + MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); + } + return; + } + + for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDB->BitTestCases[i].Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->BitTestCases[i].Parent; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + if (j+1 != ej) + SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j], + FuncInfo->MBB); + else + SDB->visitBitTestCase(SDB->BitTestCases[i].Default, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j], + FuncInfo->MBB); + + + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Update PHI Nodes + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first; + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + // This is "default" BB. We have two jumps to it. From "header" BB and + // from last "case" BB. + if (PHIBB == SDB->BitTestCases[i].Default) { + PHI->addOperand(MachineOperand:: + CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent)); + PHI->addOperand(MachineOperand:: + CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases. + back().ThisBB)); + } + // One of "cases" BB. + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); + j != ej; ++j) { + MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; + if (cBB->isSuccessor(PHIBB)) { + PHI->addOperand(MachineOperand:: + CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(cBB)); + } + } + } + } + SDB->BitTestCases.clear(); + + // If the JumpTable record is filled in, then we need to emit a jump table. + // Updating the PHI nodes is tricky in this case, since we need to determine + // whether the PHI is a successor of the range check MBB or the jump table MBB + for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDB->JTCases[i].first.Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, + FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->JTCases[i].second.MBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitJumpTable(SDB->JTCases[i].second); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Update PHI Nodes + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first; + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + // "default" BB. We can go there only from header BB. + if (PHIBB == SDB->JTCases[i].second.Default) { + PHI->addOperand + (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand + (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); + } + // JT BB. Just iterate over successors here + if (FuncInfo->MBB->isSuccessor(PHIBB)) { + PHI->addOperand + (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); + } + } + } + SDB->JTCases.clear(); + + // If the switch block involved a branch to one of the actual successors, we + // need to update PHI nodes in that block. + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + if (FuncInfo->MBB->isSuccessor(PHI->getParent())) { + PHI->addOperand( + MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); + } + } + + // If we generated any switch lowering information, build and codegen any + // additional DAGs necessary. + for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { + // Set the current basic block to the mbb we wish to insert the code into + MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + + // Determine the unique successors. + SmallVector<MachineBasicBlock *, 2> Succs; + Succs.push_back(SDB->SwitchCases[i].TrueBB); + if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB) + Succs.push_back(SDB->SwitchCases[i].FalseBB); + + // Emit the code. Note that this could result in ThisBB being split, so + // we need to check for updates. + SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + ThisBB = FuncInfo->MBB; + + // Handle any PHI nodes in successors of this chunk, as if we were coming + // from the original BB before switch expansion. Note that PHI nodes can + // occur multiple times in PHINodesToUpdate. We have to be very careful to + // handle them the right number of times. + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + FuncInfo->MBB = Succs[i]; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // FuncInfo->MBB may have been removed from the CFG if a branch was + // constant folded. + if (ThisBB->isSuccessor(FuncInfo->MBB)) { + for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin(); + Phi != FuncInfo->MBB->end() && Phi->isPHI(); + ++Phi) { + // This value for this PHI node is recorded in PHINodesToUpdate. + for (unsigned pn = 0; ; ++pn) { + assert(pn != FuncInfo->PHINodesToUpdate.size() && + "Didn't find PHI entry!"); + if (FuncInfo->PHINodesToUpdate[pn].first == Phi) { + Phi->addOperand(MachineOperand:: + CreateReg(FuncInfo->PHINodesToUpdate[pn].second, + false)); + Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); + break; + } + } + } + } + } + } + SDB->SwitchCases.clear(); +} + + +/// Create the scheduler. If a specific scheduler was specified +/// via the SchedulerRegistry, use it, otherwise select the +/// one preferred by the target. +/// +ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { + RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault(); + + if (!Ctor) { + Ctor = ISHeuristic; + RegisterScheduler::setDefault(Ctor); + } + + return Ctor(this, OptLevel); +} + +ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { + return new ScheduleHazardRecognizer(); +} + +//===----------------------------------------------------------------------===// +// Helper functions used by the generated instruction selector. +//===----------------------------------------------------------------------===// +// Calls to these methods are generated by tblgen. + +/// CheckAndMask - The isel is trying to match something like (and X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + +/// CheckOrMask - The isel is trying to match something like (or X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); + + // If all the missing bits in the or are already known to be set, match! + if ((NeededMask & KnownOne) == NeededMask) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + + +/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated +/// by tblgen. Others should not call it. +void SelectionDAGISel:: +SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { + std::vector<SDValue> InOps; + std::swap(InOps, Ops); + + Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 + Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 + Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc + Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3 + + unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); + if (InOps[e-1].getValueType() == MVT::Flag) + --e; // Don't process a flag operand if it is here. + + while (i != e) { + unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); + if (!InlineAsm::isMemKind(Flags)) { + // Just skip over this operand, copying the operands verbatim. + Ops.insert(Ops.end(), InOps.begin()+i, + InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); + i += InlineAsm::getNumOperandRegisters(Flags) + 1; + } else { + assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && + "Memory operand with multiple values?"); + // Otherwise, this is a memory operand. Ask the target to select it. + std::vector<SDValue> SelOps; + if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) + report_fatal_error("Could not match memory address. Inline asm" + " failure!"); + + // Add this to the output node. + unsigned NewFlags = + InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32)); + Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); + i += 2; + } + } + + // Add the flag input back if present. + if (e != InOps.size()) + Ops.push_back(InOps.back()); +} + +/// findFlagUse - Return use of EVT::Flag value produced by the specified +/// SDNode. +/// +static SDNode *findFlagUse(SDNode *N) { + unsigned FlagResNo = N->getNumValues()-1; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDUse &Use = I.getUse(); + if (Use.getResNo() == FlagResNo) + return Use.getUser(); + } + return NULL; +} + +/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". +/// This function recursively traverses up the operand chain, ignoring +/// certain nodes. +static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, + SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited, + bool IgnoreChains) { + // The NodeID's are given uniques ID's where a node ID is guaranteed to be + // greater than all of its (recursive) operands. If we scan to a point where + // 'use' is smaller than the node we're scanning for, then we know we will + // never find it. + // + // The Use may be -1 (unassigned) if it is a newly allocated node. This can + // happen because we scan down to newly selected nodes in the case of flag + // uses. + if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)) + return false; + + // Don't revisit nodes if we already scanned it and didn't fail, we know we + // won't fail if we scan it again. + if (!Visited.insert(Use)) + return false; + + for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { + // Ignore chain uses, they are validated by HandleMergeInputChains. + if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains) + continue; + + SDNode *N = Use->getOperand(i).getNode(); + if (N == Def) { + if (Use == ImmedUse || Use == Root) + continue; // We are not looking for immediate use. + assert(N != Root); + return true; + } + + // Traverse up the operand chain. + if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains)) + return true; + } + return false; +} + +/// IsProfitableToFold - Returns true if it's profitable to fold the specific +/// operand node N of U during instruction selection that starts at Root. +bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; + return N.hasOneUse(); +} + +/// IsLegalToFold - Returns true if the specific operand node N of +/// U can be folded during instruction selection that starts at Root. +bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, + CodeGenOpt::Level OptLevel, + bool IgnoreChains) { + if (OptLevel == CodeGenOpt::None) return false; + + // If Root use can somehow reach N through a path that that doesn't contain + // U then folding N would create a cycle. e.g. In the following + // diagram, Root can reach N through X. If N is folded into into Root, then + // X is both a predecessor and a successor of U. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ / // + // \ / // + // [Root*] // + // + // * indicates nodes to be folded together. + // + // If Root produces a flag, then it gets (even more) interesting. Since it + // will be "glued" together with its flag use in the scheduler, we need to + // check if it might reach N. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ \ // + // \ | // + // [Root*] | // + // ^ | // + // f | // + // | / // + // [Y] / // + // ^ / // + // f / // + // | / // + // [FU] // + // + // If FU (flag use) indirectly reaches N (the load), and Root folds N + // (call it Fold), then X is a predecessor of FU and a successor of + // Fold. But since Fold and FU are flagged together, this will create + // a cycle in the scheduling graph. + + // If the node has flags, walk down the graph to the "lowest" node in the + // flagged set. + EVT VT = Root->getValueType(Root->getNumValues()-1); + while (VT == MVT::Flag) { + SDNode *FU = findFlagUse(Root); + if (FU == NULL) + break; + Root = FU; + VT = Root->getValueType(Root->getNumValues()-1); + + // If our query node has a flag result with a use, we've walked up it. If + // the user (which has already been selected) has a chain or indirectly uses + // the chain, our WalkChainUsers predicate will not consider it. Because of + // this, we cannot ignore chains in this predicate. + IgnoreChains = false; + } + + + SmallPtrSet<SDNode*, 16> Visited; + return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); +} + +SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { + std::vector<SDValue> Ops(N->op_begin(), N->op_end()); + SelectInlineAsmMemoryOperands(Ops); + + std::vector<EVT> VTs; + VTs.push_back(MVT::Other); + VTs.push_back(MVT::Flag); + SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + VTs, &Ops[0], Ops.size()); + New->setNodeId(-1); + return New.getNode(); +} + +SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { + return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0)); +} + +/// GetVBR - decode a vbr encoding whose top bit is set. +ALWAYS_INLINE static uint64_t +GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { + assert(Val >= 128 && "Not a VBR"); + Val &= 127; // Remove first vbr bit. + + unsigned Shift = 7; + uint64_t NextBits; + do { + NextBits = MatcherTable[Idx++]; + Val |= (NextBits&127) << Shift; + Shift += 7; + } while (NextBits & 128); + + return Val; +} + + +/// UpdateChainsAndFlags - When a match is complete, this method updates uses of +/// interior flag and chain results to use the new flag and chain results. +void SelectionDAGISel:: +UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain, + const SmallVectorImpl<SDNode*> &ChainNodesMatched, + SDValue InputFlag, + const SmallVectorImpl<SDNode*> &FlagResultNodesMatched, + bool isMorphNodeTo) { + SmallVector<SDNode*, 4> NowDeadNodes; + + ISelUpdater ISU(ISelPosition); + + // Now that all the normal results are replaced, we replace the chain and + // flag results if present. + if (!ChainNodesMatched.empty()) { + assert(InputChain.getNode() != 0 && + "Matched input chains but didn't produce a chain"); + // Loop over all of the nodes we matched that produced a chain result. + // Replace all the chain results with the final chain we ended up with. + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + SDNode *ChainNode = ChainNodesMatched[i]; + + // If this node was already deleted, don't look at it. + if (ChainNode->getOpcode() == ISD::DELETED_NODE) + continue; + + // Don't replace the results of the root node if we're doing a + // MorphNodeTo. + if (ChainNode == NodeToMatch && isMorphNodeTo) + continue; + + SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1); + if (ChainVal.getValueType() == MVT::Flag) + ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); + assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); + CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU); + + // If the node became dead and we haven't already seen it, delete it. + if (ChainNode->use_empty() && + !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) + NowDeadNodes.push_back(ChainNode); + } + } + + // If the result produces a flag, update any flag results in the matched + // pattern with the flag result. + if (InputFlag.getNode() != 0) { + // Handle any interior nodes explicitly marked. + for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) { + SDNode *FRN = FlagResultNodesMatched[i]; + + // If this node was already deleted, don't look at it. + if (FRN->getOpcode() == ISD::DELETED_NODE) + continue; + + assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag && + "Doesn't have a flag result"); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), + InputFlag, &ISU); + + // If the node became dead and we haven't already seen it, delete it. + if (FRN->use_empty() && + !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN)) + NowDeadNodes.push_back(FRN); + } + } + + if (!NowDeadNodes.empty()) + CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU); + + DEBUG(errs() << "ISEL: Match complete!\n"); +} + +enum ChainResult { + CR_Simple, + CR_InducesCycle, + CR_LeadsToInteriorNode +}; + +/// WalkChainUsers - Walk down the users of the specified chained node that is +/// part of the pattern we're matching, looking at all of the users we find. +/// This determines whether something is an interior node, whether we have a +/// non-pattern node in between two pattern nodes (which prevent folding because +/// it would induce a cycle) and whether we have a TokenFactor node sandwiched +/// between pattern nodes (in which case the TF becomes part of the pattern). +/// +/// The walk we do here is guaranteed to be small because we quickly get down to +/// already selected nodes "below" us. +static ChainResult +WalkChainUsers(SDNode *ChainedNode, + SmallVectorImpl<SDNode*> &ChainedNodesInPattern, + SmallVectorImpl<SDNode*> &InteriorChainedNodes) { + ChainResult Result = CR_Simple; + + for (SDNode::use_iterator UI = ChainedNode->use_begin(), + E = ChainedNode->use_end(); UI != E; ++UI) { + // Make sure the use is of the chain, not some other value we produce. + if (UI.getUse().getValueType() != MVT::Other) continue; + + SDNode *User = *UI; + + // If we see an already-selected machine node, then we've gone beyond the + // pattern that we're selecting down into the already selected chunk of the + // DAG. + if (User->isMachineOpcode() || + User->getOpcode() == ISD::HANDLENODE) // Root of the graph. + continue; + + if (User->getOpcode() == ISD::CopyToReg || + User->getOpcode() == ISD::CopyFromReg || + User->getOpcode() == ISD::INLINEASM || + User->getOpcode() == ISD::EH_LABEL) { + // If their node ID got reset to -1 then they've already been selected. + // Treat them like a MachineOpcode. + if (User->getNodeId() == -1) + continue; + } + + // If we have a TokenFactor, we handle it specially. + if (User->getOpcode() != ISD::TokenFactor) { + // If the node isn't a token factor and isn't part of our pattern, then it + // must be a random chained node in between two nodes we're selecting. + // This happens when we have something like: + // x = load ptr + // call + // y = x+4 + // store y -> ptr + // Because we structurally match the load/store as a read/modify/write, + // but the call is chained between them. We cannot fold in this case + // because it would induce a cycle in the graph. + if (!std::count(ChainedNodesInPattern.begin(), + ChainedNodesInPattern.end(), User)) + return CR_InducesCycle; + + // Otherwise we found a node that is part of our pattern. For example in: + // x = load ptr + // y = x+4 + // store y -> ptr + // This would happen when we're scanning down from the load and see the + // store as a user. Record that there is a use of ChainedNode that is + // part of the pattern and keep scanning uses. + Result = CR_LeadsToInteriorNode; + InteriorChainedNodes.push_back(User); + continue; + } + + // If we found a TokenFactor, there are two cases to consider: first if the + // TokenFactor is just hanging "below" the pattern we're matching (i.e. no + // uses of the TF are in our pattern) we just want to ignore it. Second, + // the TokenFactor can be sandwiched in between two chained nodes, like so: + // [Load chain] + // ^ + // | + // [Load] + // ^ ^ + // | \ DAG's like cheese + // / \ do you? + // / | + // [TokenFactor] [Op] + // ^ ^ + // | | + // \ / + // \ / + // [Store] + // + // In this case, the TokenFactor becomes part of our match and we rewrite it + // as a new TokenFactor. + // + // To distinguish these two cases, do a recursive walk down the uses. + switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) { + case CR_Simple: + // If the uses of the TokenFactor are just already-selected nodes, ignore + // it, it is "below" our pattern. + continue; + case CR_InducesCycle: + // If the uses of the TokenFactor lead to nodes that are not part of our + // pattern that are not selected, folding would turn this into a cycle, + // bail out now. + return CR_InducesCycle; + case CR_LeadsToInteriorNode: + break; // Otherwise, keep processing. + } + + // Okay, we know we're in the interesting interior case. The TokenFactor + // is now going to be considered part of the pattern so that we rewrite its + // uses (it may have uses that are not part of the pattern) with the + // ultimate chain result of the generated code. We will also add its chain + // inputs as inputs to the ultimate TokenFactor we create. + Result = CR_LeadsToInteriorNode; + ChainedNodesInPattern.push_back(User); + InteriorChainedNodes.push_back(User); + continue; + } + + return Result; +} + +/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains +/// operation for when the pattern matched at least one node with a chains. The +/// input vector contains a list of all of the chained nodes that we match. We +/// must determine if this is a valid thing to cover (i.e. matching it won't +/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will +/// be used as the input node chain for the generated nodes. +static SDValue +HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, + SelectionDAG *CurDAG) { + // Walk all of the chained nodes we've matched, recursively scanning down the + // users of the chain result. This adds any TokenFactor nodes that are caught + // in between chained nodes to the chained and interior nodes list. + SmallVector<SDNode*, 3> InteriorChainedNodes; + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, + InteriorChainedNodes) == CR_InducesCycle) + return SDValue(); // Would induce a cycle. + } + + // Okay, we have walked all the matched nodes and collected TokenFactor nodes + // that we are interested in. Form our input TokenFactor node. + SmallVector<SDValue, 3> InputChains; + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + // Add the input chain of this node to the InputChains list (which will be + // the operands of the generated TokenFactor) if it's not an interior node. + SDNode *N = ChainNodesMatched[i]; + if (N->getOpcode() != ISD::TokenFactor) { + if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) + continue; + + // Otherwise, add the input chain. + SDValue InChain = ChainNodesMatched[i]->getOperand(0); + assert(InChain.getValueType() == MVT::Other && "Not a chain"); + InputChains.push_back(InChain); + continue; + } + + // If we have a token factor, we want to add all inputs of the token factor + // that are not part of the pattern we're matching. + for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { + if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(), + N->getOperand(op).getNode())) + InputChains.push_back(N->getOperand(op)); + } + } + + SDValue Res; + if (InputChains.size() == 1) + return InputChains[0]; + return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(), + MVT::Other, &InputChains[0], InputChains.size()); +} + +/// MorphNode - Handle morphing a node in place for the selector. +SDNode *SelectionDAGISel:: +MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) { + // It is possible we're using MorphNodeTo to replace a node with no + // normal results with one that has a normal result (or we could be + // adding a chain) and the input could have flags and chains as well. + // In this case we need to shift the operands down. + // FIXME: This is a horrible hack and broken in obscure cases, no worse + // than the old isel though. + int OldFlagResultNo = -1, OldChainResultNo = -1; + + unsigned NTMNumResults = Node->getNumValues(); + if (Node->getValueType(NTMNumResults-1) == MVT::Flag) { + OldFlagResultNo = NTMNumResults-1; + if (NTMNumResults != 1 && + Node->getValueType(NTMNumResults-2) == MVT::Other) + OldChainResultNo = NTMNumResults-2; + } else if (Node->getValueType(NTMNumResults-1) == MVT::Other) + OldChainResultNo = NTMNumResults-1; + + // Call the underlying SelectionDAG routine to do the transmogrification. Note + // that this deletes operands of the old node that become dead. + SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps); + + // MorphNodeTo can operate in two ways: if an existing node with the + // specified operands exists, it can just return it. Otherwise, it + // updates the node in place to have the requested operands. + if (Res == Node) { + // If we updated the node in place, reset the node ID. To the isel, + // this should be just like a newly allocated machine node. + Res->setNodeId(-1); + } + + unsigned ResNumResults = Res->getNumValues(); + // Move the flag if needed. + if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 && + (unsigned)OldFlagResultNo != ResNumResults-1) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo), + SDValue(Res, ResNumResults-1)); + + if ((EmitNodeInfo & OPFL_FlagOutput) != 0) + --ResNumResults; + + // Move the chain reference if needed. + if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && + (unsigned)OldChainResultNo != ResNumResults-1) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), + SDValue(Res, ResNumResults-1)); + + // Otherwise, no replacement happened because the node already exists. Replace + // Uses of the old node with the new one. + if (Res != Node) + CurDAG->ReplaceAllUsesWith(Node, Res); + + return Res; +} + +/// CheckPatternPredicate - Implements OP_CheckPatternPredicate. +ALWAYS_INLINE static bool +CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) { + // Accept if it is exactly the same as a previously recorded node. + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + return N == RecordedNodes[RecNo]; +} + +/// CheckPatternPredicate - Implements OP_CheckPatternPredicate. +ALWAYS_INLINE static bool +CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SelectionDAGISel &SDISel) { + return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); +} + +/// CheckNodePredicate - Implements OP_CheckNodePredicate. +ALWAYS_INLINE static bool +CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SelectionDAGISel &SDISel, SDNode *N) { + return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); +} + +ALWAYS_INLINE static bool +CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDNode *N) { + uint16_t Opc = MatcherTable[MatcherIndex++]; + Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + return N->getOpcode() == Opc; +} + +ALWAYS_INLINE static bool +CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering &TLI) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (N.getValueType() == VT) return true; + + // Handle the case when VT is iPTR. + return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy(); +} + +ALWAYS_INLINE static bool +CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering &TLI, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI); +} + + +ALWAYS_INLINE static bool +CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + return cast<CondCodeSDNode>(N)->get() == + (ISD::CondCode)MatcherTable[MatcherIndex++]; +} + +ALWAYS_INLINE static bool +CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering &TLI) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (cast<VTSDNode>(N)->getVT() == VT) + return true; + + // Handle the case when VT is iPTR. + return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy(); +} + +ALWAYS_INLINE static bool +CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); + return C != 0 && C->getSExtValue() == Val; +} + +ALWAYS_INLINE static bool +CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, SelectionDAGISel &SDISel) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + if (N->getOpcode() != ISD::AND) return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val); +} + +ALWAYS_INLINE static bool +CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, SelectionDAGISel &SDISel) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + if (N->getOpcode() != ISD::OR) return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val); +} + +/// IsPredicateKnownToFail - If we know how and can do so without pushing a +/// scope, evaluate the current node. If the current predicate is known to +/// fail, set Result=true and return anything. If the current predicate is +/// known to pass, set Result=false and return the MatcherIndex to continue +/// with. If the current predicate is unknown, set Result=false and return the +/// MatcherIndex to continue with. +static unsigned IsPredicateKnownToFail(const unsigned char *Table, + unsigned Index, SDValue N, + bool &Result, SelectionDAGISel &SDISel, + SmallVectorImpl<SDValue> &RecordedNodes){ + switch (Table[Index++]) { + default: + Result = false; + return Index-1; // Could not evaluate this predicate. + case SelectionDAGISel::OPC_CheckSame: + Result = !::CheckSame(Table, Index, N, RecordedNodes); + return Index; + case SelectionDAGISel::OPC_CheckPatternPredicate: + Result = !::CheckPatternPredicate(Table, Index, SDISel); + return Index; + case SelectionDAGISel::OPC_CheckPredicate: + Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode()); + return Index; + case SelectionDAGISel::OPC_CheckOpcode: + Result = !::CheckOpcode(Table, Index, N.getNode()); + return Index; + case SelectionDAGISel::OPC_CheckType: + Result = !::CheckType(Table, Index, N, SDISel.TLI); + return Index; + case SelectionDAGISel::OPC_CheckChild0Type: + case SelectionDAGISel::OPC_CheckChild1Type: + case SelectionDAGISel::OPC_CheckChild2Type: + case SelectionDAGISel::OPC_CheckChild3Type: + case SelectionDAGISel::OPC_CheckChild4Type: + case SelectionDAGISel::OPC_CheckChild5Type: + case SelectionDAGISel::OPC_CheckChild6Type: + case SelectionDAGISel::OPC_CheckChild7Type: + Result = !::CheckChildType(Table, Index, N, SDISel.TLI, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type); + return Index; + case SelectionDAGISel::OPC_CheckCondCode: + Result = !::CheckCondCode(Table, Index, N); + return Index; + case SelectionDAGISel::OPC_CheckValueType: + Result = !::CheckValueType(Table, Index, N, SDISel.TLI); + return Index; + case SelectionDAGISel::OPC_CheckInteger: + Result = !::CheckInteger(Table, Index, N); + return Index; + case SelectionDAGISel::OPC_CheckAndImm: + Result = !::CheckAndImm(Table, Index, N, SDISel); + return Index; + case SelectionDAGISel::OPC_CheckOrImm: + Result = !::CheckOrImm(Table, Index, N, SDISel); + return Index; + } +} + +namespace { + +struct MatchScope { + /// FailIndex - If this match fails, this is the index to continue with. + unsigned FailIndex; + + /// NodeStack - The node stack when the scope was formed. + SmallVector<SDValue, 4> NodeStack; + + /// NumRecordedNodes - The number of recorded nodes when the scope was formed. + unsigned NumRecordedNodes; + + /// NumMatchedMemRefs - The number of matched memref entries. + unsigned NumMatchedMemRefs; + + /// InputChain/InputFlag - The current chain/flag + SDValue InputChain, InputFlag; + + /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. + bool HasChainNodesMatched, HasFlagResultNodesMatched; +}; + +} + +SDNode *SelectionDAGISel:: +SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, + unsigned TableSize) { + // FIXME: Should these even be selected? Handle these cases in the caller? + switch (NodeToMatch->getOpcode()) { + default: + break; + case ISD::EntryToken: // These nodes remain the same. + case ISD::BasicBlock: + case ISD::Register: + //case ISD::VALUETYPE: + //case ISD::CONDCODE: + case ISD::HANDLENODE: + case ISD::MDNODE_SDNODE: + case ISD::TargetConstant: + case ISD::TargetConstantFP: + case ISD::TargetConstantPool: + case ISD::TargetFrameIndex: + case ISD::TargetExternalSymbol: + case ISD::TargetBlockAddress: + case ISD::TargetJumpTable: + case ISD::TargetGlobalTLSAddress: + case ISD::TargetGlobalAddress: + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::EH_LABEL: + NodeToMatch->setNodeId(-1); // Mark selected. + return 0; + case ISD::AssertSext: + case ISD::AssertZext: + CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), + NodeToMatch->getOperand(0)); + return 0; + case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); + case ISD::UNDEF: return Select_UNDEF(NodeToMatch); + } + + assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); + + // Set up the node stack with NodeToMatch as the only node on the stack. + SmallVector<SDValue, 8> NodeStack; + SDValue N = SDValue(NodeToMatch, 0); + NodeStack.push_back(N); + + // MatchScopes - Scopes used when matching, if a match failure happens, this + // indicates where to continue checking. + SmallVector<MatchScope, 8> MatchScopes; + + // RecordedNodes - This is the set of nodes that have been recorded by the + // state machine. + SmallVector<SDValue, 8> RecordedNodes; + + // MatchedMemRefs - This is the set of MemRef's we've seen in the input + // pattern. + SmallVector<MachineMemOperand*, 2> MatchedMemRefs; + + // These are the current input chain and flag for use when generating nodes. + // Various Emit operations change these. For example, emitting a copytoreg + // uses and updates these. + SDValue InputChain, InputFlag; + + // ChainNodesMatched - If a pattern matches nodes that have input/output + // chains, the OPC_EmitMergeInputChains operation is emitted which indicates + // which ones they are. The result is captured into this list so that we can + // update the chain results when the pattern is complete. + SmallVector<SDNode*, 3> ChainNodesMatched; + SmallVector<SDNode*, 3> FlagResultNodesMatched; + + DEBUG(errs() << "ISEL: Starting pattern match on root node: "; + NodeToMatch->dump(CurDAG); + errs() << '\n'); + + // Determine where to start the interpreter. Normally we start at opcode #0, + // but if the state machine starts with an OPC_SwitchOpcode, then we + // accelerate the first lookup (which is guaranteed to be hot) with the + // OpcodeOffset table. + unsigned MatcherIndex = 0; + + if (!OpcodeOffset.empty()) { + // Already computed the OpcodeOffset table, just index into it. + if (N.getOpcode() < OpcodeOffset.size()) + MatcherIndex = OpcodeOffset[N.getOpcode()]; + DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n"); + + } else if (MatcherTable[0] == OPC_SwitchOpcode) { + // Otherwise, the table isn't computed, but the state machine does start + // with an OPC_SwitchOpcode instruction. Populate the table now, since this + // is the first time we're selecting an instruction. + unsigned Idx = 1; + while (1) { + // Get the size of this case. + unsigned CaseSize = MatcherTable[Idx++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, Idx); + if (CaseSize == 0) break; + + // Get the opcode, add the index to the table. + uint16_t Opc = MatcherTable[Idx++]; + Opc |= (unsigned short)MatcherTable[Idx++] << 8; + if (Opc >= OpcodeOffset.size()) + OpcodeOffset.resize((Opc+1)*2); + OpcodeOffset[Opc] = Idx; + Idx += CaseSize; + } + + // Okay, do the lookup for the first opcode. + if (N.getOpcode() < OpcodeOffset.size()) + MatcherIndex = OpcodeOffset[N.getOpcode()]; + } + + while (1) { + assert(MatcherIndex < TableSize && "Invalid index"); +#ifndef NDEBUG + unsigned CurrentOpcodeIndex = MatcherIndex; +#endif + BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++]; + switch (Opcode) { + case OPC_Scope: { + // Okay, the semantics of this operation are that we should push a scope + // then evaluate the first child. However, pushing a scope only to have + // the first check fail (which then pops it) is inefficient. If we can + // determine immediately that the first check (or first several) will + // immediately fail, don't even bother pushing a scope for them. + unsigned FailIndex; + + while (1) { + unsigned NumToSkip = MatcherTable[MatcherIndex++]; + if (NumToSkip & 128) + NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); + // Found the end of the scope with no match. + if (NumToSkip == 0) { + FailIndex = 0; + break; + } + + FailIndex = MatcherIndex+NumToSkip; + + unsigned MatcherIndexOfPredicate = MatcherIndex; + (void)MatcherIndexOfPredicate; // silence warning. + + // If we can't evaluate this predicate without pushing a scope (e.g. if + // it is a 'MoveParent') or if the predicate succeeds on this node, we + // push the scope and evaluate the full predicate chain. + bool Result; + MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N, + Result, *this, RecordedNodes); + if (!Result) + break; + + DEBUG(errs() << " Skipped scope entry (due to false predicate) at " + << "index " << MatcherIndexOfPredicate + << ", continuing at " << FailIndex << "\n"); + ++NumDAGIselRetries; + + // Otherwise, we know that this case of the Scope is guaranteed to fail, + // move to the next case. + MatcherIndex = FailIndex; + } + + // If the whole scope failed to match, bail. + if (FailIndex == 0) break; + + // Push a MatchScope which indicates where to go if the first child fails + // to match. + MatchScope NewEntry; + NewEntry.FailIndex = FailIndex; + NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end()); + NewEntry.NumRecordedNodes = RecordedNodes.size(); + NewEntry.NumMatchedMemRefs = MatchedMemRefs.size(); + NewEntry.InputChain = InputChain; + NewEntry.InputFlag = InputFlag; + NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); + NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty(); + MatchScopes.push_back(NewEntry); + continue; + } + case OPC_RecordNode: + // Remember this node, it may end up being an operand in the pattern. + RecordedNodes.push_back(N); + continue; + + case OPC_RecordChild0: case OPC_RecordChild1: + case OPC_RecordChild2: case OPC_RecordChild3: + case OPC_RecordChild4: case OPC_RecordChild5: + case OPC_RecordChild6: case OPC_RecordChild7: { + unsigned ChildNo = Opcode-OPC_RecordChild0; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + + RecordedNodes.push_back(N->getOperand(ChildNo)); + continue; + } + case OPC_RecordMemRef: + MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand()); + continue; + + case OPC_CaptureFlagInput: + // If the current node has an input flag, capture it in InputFlag. + if (N->getNumOperands() != 0 && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) + InputFlag = N->getOperand(N->getNumOperands()-1); + continue; + + case OPC_MoveChild: { + unsigned ChildNo = MatcherTable[MatcherIndex++]; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + N = N.getOperand(ChildNo); + NodeStack.push_back(N); + continue; + } + + case OPC_MoveParent: + // Pop the current node off the NodeStack. + NodeStack.pop_back(); + assert(!NodeStack.empty() && "Node stack imbalance!"); + N = NodeStack.back(); + continue; + + case OPC_CheckSame: + if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; + continue; + case OPC_CheckPatternPredicate: + if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; + continue; + case OPC_CheckPredicate: + if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this, + N.getNode())) + break; + continue; + case OPC_CheckComplexPat: { + unsigned CPNum = MatcherTable[MatcherIndex++]; + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum, + RecordedNodes)) + break; + continue; + } + case OPC_CheckOpcode: + if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break; + continue; + + case OPC_CheckType: + if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; + continue; + + case OPC_SwitchOpcode: { + unsigned CurNodeOpcode = N.getOpcode(); + unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; + unsigned CaseSize; + while (1) { + // Get the size of this case. + CaseSize = MatcherTable[MatcherIndex++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); + if (CaseSize == 0) break; + + uint16_t Opc = MatcherTable[MatcherIndex++]; + Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + + // If the opcode matches, then we will execute this case. + if (CurNodeOpcode == Opc) + break; + + // Otherwise, skip over this case. + MatcherIndex += CaseSize; + } + + // If no cases matched, bail out. + if (CaseSize == 0) break; + + // Otherwise, execute the case we found. + DEBUG(errs() << " OpcodeSwitch from " << SwitchStart + << " to " << MatcherIndex << "\n"); + continue; + } + + case OPC_SwitchType: { + MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy; + unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; + unsigned CaseSize; + while (1) { + // Get the size of this case. + CaseSize = MatcherTable[MatcherIndex++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); + if (CaseSize == 0) break; + + MVT::SimpleValueType CaseVT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (CaseVT == MVT::iPTR) + CaseVT = TLI.getPointerTy().SimpleTy; + + // If the VT matches, then we will execute this case. + if (CurNodeVT == CaseVT) + break; + + // Otherwise, skip over this case. + MatcherIndex += CaseSize; + } + + // If no cases matched, bail out. + if (CaseSize == 0) break; + + // Otherwise, execute the case we found. + DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() + << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); + continue; + } + case OPC_CheckChild0Type: case OPC_CheckChild1Type: + case OPC_CheckChild2Type: case OPC_CheckChild3Type: + case OPC_CheckChild4Type: case OPC_CheckChild5Type: + case OPC_CheckChild6Type: case OPC_CheckChild7Type: + if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, + Opcode-OPC_CheckChild0Type)) + break; + continue; + case OPC_CheckCondCode: + if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; + continue; + case OPC_CheckValueType: + if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break; + continue; + case OPC_CheckInteger: + if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; + continue; + case OPC_CheckAndImm: + if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break; + continue; + case OPC_CheckOrImm: + if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; + continue; + + case OPC_CheckFoldableChainNode: { + assert(NodeStack.size() != 1 && "No parent node"); + // Verify that all intermediate nodes between the root and this one have + // a single use. + bool HasMultipleUses = false; + for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) + if (!NodeStack[i].hasOneUse()) { + HasMultipleUses = true; + break; + } + if (HasMultipleUses) break; + + // Check to see that the target thinks this is profitable to fold and that + // we can fold it without inducing cycles in the graph. + if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(), + NodeToMatch) || + !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(), + NodeToMatch, OptLevel, + true/*We validate our own chains*/)) + break; + + continue; + } + case OPC_EmitInteger: { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT)); + continue; + } + case OPC_EmitRegister: { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + unsigned RegNo = MatcherTable[MatcherIndex++]; + RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT)); + continue; + } + + case OPC_EmitConvertToTarget: { + // Convert from IMM/FPIMM to target version. + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + SDValue Imm = RecordedNodes[RecNo]; + + if (Imm->getOpcode() == ISD::Constant) { + int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue(); + Imm = CurDAG->getTargetConstant(Val, Imm.getValueType()); + } else if (Imm->getOpcode() == ISD::ConstantFP) { + const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); + Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType()); + } + + RecordedNodes.push_back(Imm); + continue; + } + + case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 + case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1 + // These are space-optimized forms of OPC_EmitMergeInputChains. + assert(InputChain.getNode() == 0 && + "EmitMergeInputChains should be the first chain producing node"); + assert(ChainNodesMatched.empty() && + "Should only have one EmitMergeInputChains per match"); + + // Read all of the chained nodes. + unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode()); + + // FIXME: What if other value results of the node have uses not matched + // by this pattern? + if (ChainNodesMatched.back() != NodeToMatch && + !RecordedNodes[RecNo].hasOneUse()) { + ChainNodesMatched.clear(); + break; + } + + // Merge the input chains if they are not intra-pattern references. + InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + + if (InputChain.getNode() == 0) + break; // Failed to merge. + continue; + } + + case OPC_EmitMergeInputChains: { + assert(InputChain.getNode() == 0 && + "EmitMergeInputChains should be the first chain producing node"); + // This node gets a list of nodes we matched in the input that have + // chains. We want to token factor all of the input chains to these nodes + // together. However, if any of the input chains is actually one of the + // nodes matched in this pattern, then we have an intra-match reference. + // Ignore these because the newly token factored chain should not refer to + // the old nodes. + unsigned NumChains = MatcherTable[MatcherIndex++]; + assert(NumChains != 0 && "Can't TF zero chains"); + + assert(ChainNodesMatched.empty() && + "Should only have one EmitMergeInputChains per match"); + + // Read all of the chained nodes. + for (unsigned i = 0; i != NumChains; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode()); + + // FIXME: What if other value results of the node have uses not matched + // by this pattern? + if (ChainNodesMatched.back() != NodeToMatch && + !RecordedNodes[RecNo].hasOneUse()) { + ChainNodesMatched.clear(); + break; + } + } + + // If the inner loop broke out, the match fails. + if (ChainNodesMatched.empty()) + break; + + // Merge the input chains if they are not intra-pattern references. + InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + + if (InputChain.getNode() == 0) + break; // Failed to merge. + + continue; + } + + case OPC_EmitCopyToReg: { + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + unsigned DestPhysReg = MatcherTable[MatcherIndex++]; + + if (InputChain.getNode() == 0) + InputChain = CurDAG->getEntryNode(); + + InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(), + DestPhysReg, RecordedNodes[RecNo], + InputFlag); + + InputFlag = InputChain.getValue(1); + continue; + } + + case OPC_EmitNodeXForm: { + unsigned XFormNo = MatcherTable[MatcherIndex++]; + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo)); + continue; + } + + case OPC_EmitNode: + case OPC_MorphNodeTo: { + uint16_t TargetOpc = MatcherTable[MatcherIndex++]; + TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; + // Get the result VT list. + unsigned NumVTs = MatcherTable[MatcherIndex++]; + SmallVector<EVT, 4> VTs; + for (unsigned i = 0; i != NumVTs; ++i) { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy; + VTs.push_back(VT); + } + + if (EmitNodeInfo & OPFL_Chain) + VTs.push_back(MVT::Other); + if (EmitNodeInfo & OPFL_FlagOutput) + VTs.push_back(MVT::Flag); + + // This is hot code, so optimize the two most common cases of 1 and 2 + // results. + SDVTList VTList; + if (VTs.size() == 1) + VTList = CurDAG->getVTList(VTs[0]); + else if (VTs.size() == 2) + VTList = CurDAG->getVTList(VTs[0], VTs[1]); + else + VTList = CurDAG->getVTList(VTs.data(), VTs.size()); + + // Get the operand list. + unsigned NumOps = MatcherTable[MatcherIndex++]; + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != NumOps; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + if (RecNo & 128) + RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); + + assert(RecNo < RecordedNodes.size() && "Invalid EmitNode"); + Ops.push_back(RecordedNodes[RecNo]); + } + + // If there are variadic operands to add, handle them now. + if (EmitNodeInfo & OPFL_VariadicInfo) { + // Determine the start index to copy from. + unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo); + FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0; + assert(NodeToMatch->getNumOperands() >= FirstOpToCopy && + "Invalid variadic node"); + // Copy all of the variadic operands, not including a potential flag + // input. + for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands(); + i != e; ++i) { + SDValue V = NodeToMatch->getOperand(i); + if (V.getValueType() == MVT::Flag) break; + Ops.push_back(V); + } + } + + // If this has chain/flag inputs, add them. + if (EmitNodeInfo & OPFL_Chain) + Ops.push_back(InputChain); + if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0) + Ops.push_back(InputFlag); + + // Create the node. + SDNode *Res = 0; + if (Opcode != OPC_MorphNodeTo) { + // If this is a normal EmitNode command, just create the new node and + // add the results to the RecordedNodes list. + Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(), + VTList, Ops.data(), Ops.size()); + + // Add all the non-flag/non-chain results to the RecordedNodes list. + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break; + RecordedNodes.push_back(SDValue(Res, i)); + } + + } else { + Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(), + EmitNodeInfo); + } + + // If the node had chain/flag results, update our notion of the current + // chain and flag. + if (EmitNodeInfo & OPFL_FlagOutput) { + InputFlag = SDValue(Res, VTs.size()-1); + if (EmitNodeInfo & OPFL_Chain) + InputChain = SDValue(Res, VTs.size()-2); + } else if (EmitNodeInfo & OPFL_Chain) + InputChain = SDValue(Res, VTs.size()-1); + + // If the OPFL_MemRefs flag is set on this node, slap all of the + // accumulated memrefs onto it. + // + // FIXME: This is vastly incorrect for patterns with multiple outputs + // instructions that access memory and for ComplexPatterns that match + // loads. + if (EmitNodeInfo & OPFL_MemRefs) { + MachineSDNode::mmo_iterator MemRefs = + MF->allocateMemRefsArray(MatchedMemRefs.size()); + std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs); + cast<MachineSDNode>(Res) + ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size()); + } + + DEBUG(errs() << " " + << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created") + << " node: "; Res->dump(CurDAG); errs() << "\n"); + + // If this was a MorphNodeTo then we're completely done! + if (Opcode == OPC_MorphNodeTo) { + // Update chain and flag uses. + UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched, + InputFlag, FlagResultNodesMatched, true); + return Res; + } + + continue; + } + + case OPC_MarkFlagResults: { + unsigned NumNodes = MatcherTable[MatcherIndex++]; + + // Read and remember all the flag-result nodes. + for (unsigned i = 0; i != NumNodes; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + if (RecNo & 128) + RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); + + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode()); + } + continue; + } + + case OPC_CompleteMatch: { + // The match has been completed, and any new nodes (if any) have been + // created. Patch up references to the matched dag to use the newly + // created nodes. + unsigned NumResults = MatcherTable[MatcherIndex++]; + + for (unsigned i = 0; i != NumResults; ++i) { + unsigned ResSlot = MatcherTable[MatcherIndex++]; + if (ResSlot & 128) + ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); + + assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); + SDValue Res = RecordedNodes[ResSlot]; + + assert(i < NodeToMatch->getNumValues() && + NodeToMatch->getValueType(i) != MVT::Other && + NodeToMatch->getValueType(i) != MVT::Flag && + "Invalid number of results to complete!"); + assert((NodeToMatch->getValueType(i) == Res.getValueType() || + NodeToMatch->getValueType(i) == MVT::iPTR || + Res.getValueType() == MVT::iPTR || + NodeToMatch->getValueType(i).getSizeInBits() == + Res.getValueType().getSizeInBits()) && + "invalid replacement"); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); + } + + // If the root node defines a flag, add it to the flag nodes to update + // list. + if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag) + FlagResultNodesMatched.push_back(NodeToMatch); + + // Update chain and flag uses. + UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched, + InputFlag, FlagResultNodesMatched, false); + + assert(NodeToMatch->use_empty() && + "Didn't replace all uses of the node?"); + + // FIXME: We just return here, which interacts correctly with SelectRoot + // above. We should fix this to not return an SDNode* anymore. + return 0; + } + } + + // If the code reached this point, then the match failed. See if there is + // another child to try in the current 'Scope', otherwise pop it until we + // find a case to check. + DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); + ++NumDAGIselRetries; + while (1) { + if (MatchScopes.empty()) { + CannotYetSelect(NodeToMatch); + return 0; + } + + // Restore the interpreter state back to the point where the scope was + // formed. + MatchScope &LastScope = MatchScopes.back(); + RecordedNodes.resize(LastScope.NumRecordedNodes); + NodeStack.clear(); + NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end()); + N = NodeStack.back(); + + if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size()) + MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); + MatcherIndex = LastScope.FailIndex; + + DEBUG(errs() << " Continuing at " << MatcherIndex << "\n"); + + InputChain = LastScope.InputChain; + InputFlag = LastScope.InputFlag; + if (!LastScope.HasChainNodesMatched) + ChainNodesMatched.clear(); + if (!LastScope.HasFlagResultNodesMatched) + FlagResultNodesMatched.clear(); + + // Check to see what the offset is at the new MatcherIndex. If it is zero + // we have reached the end of this scope, otherwise we have another child + // in the current scope to try. + unsigned NumToSkip = MatcherTable[MatcherIndex++]; + if (NumToSkip & 128) + NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); + + // If we have another child in this scope to match, update FailIndex and + // try it. + if (NumToSkip != 0) { + LastScope.FailIndex = MatcherIndex+NumToSkip; + break; + } + + // End of this scope, pop it and try the next child in the containing + // scope. + MatchScopes.pop_back(); + } + } +} + + + +void SelectionDAGISel::CannotYetSelect(SDNode *N) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Cannot yet select: "; + + if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN && + N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && + N->getOpcode() != ISD::INTRINSIC_VOID) { + N->printrFull(Msg, CurDAG); + } else { + bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; + unsigned iid = + cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue(); + if (iid < Intrinsic::num_intrinsics) + Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid); + else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo()) + Msg << "target intrinsic %" << TII->getName(iid); + else + Msg << "unknown intrinsic #" << iid; + } + report_fatal_error(Msg.str()); +} + +char SelectionDAGISel::ID = 0; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp new file mode 100644 index 0000000..8313de5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -0,0 +1,301 @@ +//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" +using namespace llvm; + +namespace llvm { + template<> + struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits { + + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} + + static bool hasEdgeDestLabels() { + return true; + } + + static unsigned numEdgeDestLabels(const void *Node) { + return ((const SDNode *) Node)->getNumValues(); + } + + static std::string getEdgeDestLabel(const void *Node, unsigned i) { + return ((const SDNode *) Node)->getValueType(i).getEVTString(); + } + + template<typename EdgeIter> + static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { + return itostr(I - SDNodeIterator::begin((SDNode *) Node)); + } + + /// edgeTargetsEdgeSource - This method returns true if this outgoing edge + /// should actually target another edge source, not a node. If this method + /// is implemented, getEdgeTarget should be implemented. + template<typename EdgeIter> + static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) { + return true; + } + + /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is + /// called to determine which outgoing edge of Node is the target of this + /// edge. + template<typename EdgeIter> + static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) { + SDNode *TargetNode = *I; + SDNodeIterator NI = SDNodeIterator::begin(TargetNode); + std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo()); + return NI; + } + + static std::string getGraphName(const SelectionDAG *G) { + return G->getMachineFunction().getFunction()->getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool hasNodeAddressLabel(const SDNode *Node, + const SelectionDAG *Graph) { + return true; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + template<typename EdgeIter> + static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { + SDValue Op = EI.getNode()->getOperand(EI.getOperand()); + EVT VT = Op.getValueType(); + if (VT == MVT::Flag) + return "color=red,style=bold"; + else if (VT == MVT::Other) + return "color=blue,style=dashed"; + return ""; + } + + + static std::string getSimpleNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + std::string Result = Node->getOperationName(G); + { + raw_string_ostream OS(Result); + Node->print_details(OS, G); + } + return Result; + } + std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph); + static std::string getNodeAttributes(const SDNode *N, + const SelectionDAG *Graph) { +#ifndef NDEBUG + const std::string &Attrs = Graph->getGraphAttrs(N); + if (!Attrs.empty()) { + if (Attrs.find("shape=") == std::string::npos) + return std::string("shape=Mrecord,") + Attrs; + else + return Attrs; + } +#endif + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(SelectionDAG *G, + GraphWriter<SelectionDAG*> &GW) { + GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + if (G->getRoot().getNode()) + GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(), + "color=blue,style=dashed"); + } + }; +} + +std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G); +} + + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void SelectionDAG::viewGraph(const std::string &Title) { +// This code is only for debugging! +#ifndef NDEBUG + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), + false, Title); +#else + errs() << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +// This overload is defined out-of-line here instead of just using a +// default parameter because this is easiest for gdb to call. +void SelectionDAG::viewGraph() { + viewGraph(""); +} + +/// clearGraphAttrs - Clear all previously defined node graph attributes. +/// Intended to be used from a debugging tool (eg. gdb). +void SelectionDAG::clearGraphAttrs() { +#ifndef NDEBUG + NodeGraphAttrs.clear(); +#else + errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".) +/// +void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { +#ifndef NDEBUG + NodeGraphAttrs[N] = Attrs; +#else + errs() << "SelectionDAG::setGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".) +/// Used from getNodeAttributes. +const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { +#ifndef NDEBUG + std::map<const SDNode *, std::string>::const_iterator I = + NodeGraphAttrs.find(N); + + if (I != NodeGraphAttrs.end()) + return I->second; + else + return ""; +#else + errs() << "SelectionDAG::getGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; + return std::string(); +#endif +} + +/// setGraphColor - Convenience for setting node color attribute. +/// +void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { +#ifndef NDEBUG + NodeGraphAttrs[N] = std::string("color=") + Color; +#else + errs() << "SelectionDAG::setGraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +/// setSubgraphColorHelper - Implement setSubgraphColor. Return +/// whether we truncated the search. +/// +bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited, + int level, bool &printed) { + bool hit_limit = false; + +#ifndef NDEBUG + if (level >= 20) { + if (!printed) { + printed = true; + DEBUG(dbgs() << "setSubgraphColor hit max level\n"); + } + return true; + } + + unsigned oldSize = visited.size(); + visited.insert(N); + if (visited.size() != oldSize) { + setGraphColor(N, Color); + for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N); + i != iend; + ++i) { + hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit; + } + } +#else + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif + return hit_limit; +} + +/// setSubgraphColor - Convenience for setting subgraph color attribute. +/// +void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) { +#ifndef NDEBUG + DenseSet<SDNode *> visited; + bool printed = false; + if (setSubgraphColorHelper(N, Color, visited, 0, printed)) { + // Visually mark that we hit the limit + if (strcmp(Color, "red") == 0) { + setSubgraphColorHelper(N, "blue", visited, 0, printed); + } else if (strcmp(Color, "yellow") == 0) { + setSubgraphColorHelper(N, "green", visited, 0, printed); + } + } + +#else + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream O(s); + O << "SU(" << SU->NodeNum << "): "; + if (SU->getNode()) { + SmallVector<SDNode *, 4> FlaggedNodes; + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + O << DOTGraphTraits<SelectionDAG*> + ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); + FlaggedNodes.pop_back(); + if (!FlaggedNodes.empty()) + O << "\n "; + } + } else { + O << "CROSS RC COPY"; + } + return O.str(); +} + +void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const { + if (DAG) { + // Draw a special "GraphRoot" node to indicate the root of the graph. + GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + const SDNode *N = DAG->getRoot().getNode(); + if (N && N->getNodeId() != -1) + GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1, + "color=blue,style=dashed"); + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp new file mode 100644 index 0000000..b74f600 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -0,0 +1,2927 @@ +//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetLowering.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/GlobalVariable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +namespace llvm { +TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) { + bool isLocal = GV->hasLocalLinkage(); + bool isDeclaration = GV->isDeclaration(); + // FIXME: what should we do for protected and internal visibility? + // For variables, is internal different from hidden? + bool isHidden = GV->hasHiddenVisibility(); + + if (reloc == Reloc::PIC_) { + if (isLocal || isHidden) + return TLSModel::LocalDynamic; + else + return TLSModel::GeneralDynamic; + } else { + if (!isDeclaration || isHidden) + return TLSModel::LocalExec; + else + return TLSModel::InitialExec; + } +} +} + +/// InitLibcallNames - Set default libcall names. +/// +static void InitLibcallNames(const char **Names) { + Names[RTLIB::SHL_I16] = "__ashlhi3"; + Names[RTLIB::SHL_I32] = "__ashlsi3"; + Names[RTLIB::SHL_I64] = "__ashldi3"; + Names[RTLIB::SHL_I128] = "__ashlti3"; + Names[RTLIB::SRL_I16] = "__lshrhi3"; + Names[RTLIB::SRL_I32] = "__lshrsi3"; + Names[RTLIB::SRL_I64] = "__lshrdi3"; + Names[RTLIB::SRL_I128] = "__lshrti3"; + Names[RTLIB::SRA_I16] = "__ashrhi3"; + Names[RTLIB::SRA_I32] = "__ashrsi3"; + Names[RTLIB::SRA_I64] = "__ashrdi3"; + Names[RTLIB::SRA_I128] = "__ashrti3"; + Names[RTLIB::MUL_I8] = "__mulqi3"; + Names[RTLIB::MUL_I16] = "__mulhi3"; + Names[RTLIB::MUL_I32] = "__mulsi3"; + Names[RTLIB::MUL_I64] = "__muldi3"; + Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::SDIV_I8] = "__divqi3"; + Names[RTLIB::SDIV_I16] = "__divhi3"; + Names[RTLIB::SDIV_I32] = "__divsi3"; + Names[RTLIB::SDIV_I64] = "__divdi3"; + Names[RTLIB::SDIV_I128] = "__divti3"; + Names[RTLIB::UDIV_I8] = "__udivqi3"; + Names[RTLIB::UDIV_I16] = "__udivhi3"; + Names[RTLIB::UDIV_I32] = "__udivsi3"; + Names[RTLIB::UDIV_I64] = "__udivdi3"; + Names[RTLIB::UDIV_I128] = "__udivti3"; + Names[RTLIB::SREM_I8] = "__modqi3"; + Names[RTLIB::SREM_I16] = "__modhi3"; + Names[RTLIB::SREM_I32] = "__modsi3"; + Names[RTLIB::SREM_I64] = "__moddi3"; + Names[RTLIB::SREM_I128] = "__modti3"; + Names[RTLIB::UREM_I8] = "__umodqi3"; + Names[RTLIB::UREM_I16] = "__umodhi3"; + Names[RTLIB::UREM_I32] = "__umodsi3"; + Names[RTLIB::UREM_I64] = "__umoddi3"; + Names[RTLIB::UREM_I128] = "__umodti3"; + Names[RTLIB::NEG_I32] = "__negsi2"; + Names[RTLIB::NEG_I64] = "__negdi2"; + Names[RTLIB::ADD_F32] = "__addsf3"; + Names[RTLIB::ADD_F64] = "__adddf3"; + Names[RTLIB::ADD_F80] = "__addxf3"; + Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; + Names[RTLIB::SUB_F32] = "__subsf3"; + Names[RTLIB::SUB_F64] = "__subdf3"; + Names[RTLIB::SUB_F80] = "__subxf3"; + Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; + Names[RTLIB::MUL_F32] = "__mulsf3"; + Names[RTLIB::MUL_F64] = "__muldf3"; + Names[RTLIB::MUL_F80] = "__mulxf3"; + Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; + Names[RTLIB::DIV_F32] = "__divsf3"; + Names[RTLIB::DIV_F64] = "__divdf3"; + Names[RTLIB::DIV_F80] = "__divxf3"; + Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; + Names[RTLIB::REM_F32] = "fmodf"; + Names[RTLIB::REM_F64] = "fmod"; + Names[RTLIB::REM_F80] = "fmodl"; + Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::POWI_F32] = "__powisf2"; + Names[RTLIB::POWI_F64] = "__powidf2"; + Names[RTLIB::POWI_F80] = "__powixf2"; + Names[RTLIB::POWI_PPCF128] = "__powitf2"; + Names[RTLIB::SQRT_F32] = "sqrtf"; + Names[RTLIB::SQRT_F64] = "sqrt"; + Names[RTLIB::SQRT_F80] = "sqrtl"; + Names[RTLIB::SQRT_PPCF128] = "sqrtl"; + Names[RTLIB::LOG_F32] = "logf"; + Names[RTLIB::LOG_F64] = "log"; + Names[RTLIB::LOG_F80] = "logl"; + Names[RTLIB::LOG_PPCF128] = "logl"; + Names[RTLIB::LOG2_F32] = "log2f"; + Names[RTLIB::LOG2_F64] = "log2"; + Names[RTLIB::LOG2_F80] = "log2l"; + Names[RTLIB::LOG2_PPCF128] = "log2l"; + Names[RTLIB::LOG10_F32] = "log10f"; + Names[RTLIB::LOG10_F64] = "log10"; + Names[RTLIB::LOG10_F80] = "log10l"; + Names[RTLIB::LOG10_PPCF128] = "log10l"; + Names[RTLIB::EXP_F32] = "expf"; + Names[RTLIB::EXP_F64] = "exp"; + Names[RTLIB::EXP_F80] = "expl"; + Names[RTLIB::EXP_PPCF128] = "expl"; + Names[RTLIB::EXP2_F32] = "exp2f"; + Names[RTLIB::EXP2_F64] = "exp2"; + Names[RTLIB::EXP2_F80] = "exp2l"; + Names[RTLIB::EXP2_PPCF128] = "exp2l"; + Names[RTLIB::SIN_F32] = "sinf"; + Names[RTLIB::SIN_F64] = "sin"; + Names[RTLIB::SIN_F80] = "sinl"; + Names[RTLIB::SIN_PPCF128] = "sinl"; + Names[RTLIB::COS_F32] = "cosf"; + Names[RTLIB::COS_F64] = "cos"; + Names[RTLIB::COS_F80] = "cosl"; + Names[RTLIB::COS_PPCF128] = "cosl"; + Names[RTLIB::POW_F32] = "powf"; + Names[RTLIB::POW_F64] = "pow"; + Names[RTLIB::POW_F80] = "powl"; + Names[RTLIB::POW_PPCF128] = "powl"; + Names[RTLIB::CEIL_F32] = "ceilf"; + Names[RTLIB::CEIL_F64] = "ceil"; + Names[RTLIB::CEIL_F80] = "ceill"; + Names[RTLIB::CEIL_PPCF128] = "ceill"; + Names[RTLIB::TRUNC_F32] = "truncf"; + Names[RTLIB::TRUNC_F64] = "trunc"; + Names[RTLIB::TRUNC_F80] = "truncl"; + Names[RTLIB::TRUNC_PPCF128] = "truncl"; + Names[RTLIB::RINT_F32] = "rintf"; + Names[RTLIB::RINT_F64] = "rint"; + Names[RTLIB::RINT_F80] = "rintl"; + Names[RTLIB::RINT_PPCF128] = "rintl"; + Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; + Names[RTLIB::NEARBYINT_F64] = "nearbyint"; + Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; + Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::FLOOR_F32] = "floorf"; + Names[RTLIB::FLOOR_F64] = "floor"; + Names[RTLIB::FLOOR_F80] = "floorl"; + Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::COPYSIGN_F32] = "copysignf"; + Names[RTLIB::COPYSIGN_F64] = "copysign"; + Names[RTLIB::COPYSIGN_F80] = "copysignl"; + Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; + Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; + Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; + Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; + Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; + Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; + Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; + Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; + Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; + Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; + Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; + Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; + Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; + Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi"; + Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi"; + Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; + Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; + Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; + Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; + Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; + Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; + Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; + Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi"; + Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi"; + Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; + Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; + Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; + Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi"; + Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi"; + Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; + Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; + Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; + Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; + Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; + Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; + Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; + Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; + Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; + Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; + Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; + Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; + Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; + Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; + Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; + Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; + Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; + Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; + Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; + Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; + Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; + Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; + Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; + Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; + Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; + Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; + Names[RTLIB::OEQ_F32] = "__eqsf2"; + Names[RTLIB::OEQ_F64] = "__eqdf2"; + Names[RTLIB::UNE_F32] = "__nesf2"; + Names[RTLIB::UNE_F64] = "__nedf2"; + Names[RTLIB::OGE_F32] = "__gesf2"; + Names[RTLIB::OGE_F64] = "__gedf2"; + Names[RTLIB::OLT_F32] = "__ltsf2"; + Names[RTLIB::OLT_F64] = "__ltdf2"; + Names[RTLIB::OLE_F32] = "__lesf2"; + Names[RTLIB::OLE_F64] = "__ledf2"; + Names[RTLIB::OGT_F32] = "__gtsf2"; + Names[RTLIB::OGT_F64] = "__gtdf2"; + Names[RTLIB::UO_F32] = "__unordsf2"; + Names[RTLIB::UO_F64] = "__unorddf2"; + Names[RTLIB::O_F32] = "__unordsf2"; + Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::MEMCPY] = "memcpy"; + Names[RTLIB::MEMMOVE] = "memmove"; + Names[RTLIB::MEMSET] = "memset"; + Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; + Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; + Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; + Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; + Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; + Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; + Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; + Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; + Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; + Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; + Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; + Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; + Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; + Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; + Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4"; + Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; + Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; + Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; + Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; +} + +/// InitLibcallCallingConvs - Set default libcall CallingConvs. +/// +static void InitLibcallCallingConvs(CallingConv::ID *CCs) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { + CCs[i] = CallingConv::C; + } +} + +/// getFPEXT - Return the FPEXT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::f64) + return FPEXT_F32_F64; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPROUND - Return the FPROUND_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { + if (RetVT == MVT::f32) { + if (OpVT == MVT::f64) + return FPROUND_F64_F32; + if (OpVT == MVT::f80) + return FPROUND_F80_F32; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F32; + } else if (RetVT == MVT::f64) { + if (OpVT == MVT::f80) + return FPROUND_F80_F64; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F64; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOSINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F32_I16; + if (RetVT == MVT::i32) + return FPTOSINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOSINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F64_I16; + if (RetVT == MVT::i32) + return FPTOSINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOSINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F80_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOSINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOUINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F32_I16; + if (RetVT == MVT::i32) + return FPTOUINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOUINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F64_I16; + if (RetVT == MVT::i32) + return FPTOUINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOUINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F80_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOUINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return SINTTOFP_I32_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I32_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I32_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return SINTTOFP_I64_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I64_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I64_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return SINTTOFP_I128_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I128_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I128_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return UINTTOFP_I32_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I32_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I32_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return UINTTOFP_I64_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I64_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I64_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return UINTTOFP_I128_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I128_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I128_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// InitCmpLibcallCCs - Set default comparison libcall CC. +/// +static void InitCmpLibcallCCs(ISD::CondCode *CCs) { + memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); + CCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CCs[RTLIB::UNE_F32] = ISD::SETNE; + CCs[RTLIB::UNE_F64] = ISD::SETNE; + CCs[RTLIB::OGE_F32] = ISD::SETGE; + CCs[RTLIB::OGE_F64] = ISD::SETGE; + CCs[RTLIB::OLT_F32] = ISD::SETLT; + CCs[RTLIB::OLT_F64] = ISD::SETLT; + CCs[RTLIB::OLE_F32] = ISD::SETLE; + CCs[RTLIB::OLE_F64] = ISD::SETLE; + CCs[RTLIB::OGT_F32] = ISD::SETGT; + CCs[RTLIB::OGT_F64] = ISD::SETGT; + CCs[RTLIB::UO_F32] = ISD::SETNE; + CCs[RTLIB::UO_F64] = ISD::SETNE; + CCs[RTLIB::O_F32] = ISD::SETEQ; + CCs[RTLIB::O_F64] = ISD::SETEQ; +} + +/// NOTE: The constructor takes ownership of TLOF. +TargetLowering::TargetLowering(const TargetMachine &tm, + const TargetLoweringObjectFile *tlof) + : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { + // All operations default to being supported. + memset(OpActions, 0, sizeof(OpActions)); + memset(LoadExtActions, 0, sizeof(LoadExtActions)); + memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); + memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); + memset(CondCodeActions, 0, sizeof(CondCodeActions)); + + // Set default actions for various operations. + for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + // Default all indexed load / store to expand. + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); + setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); + } + + // These operations default to expand. + setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + } + + // Most targets ignore the @llvm.prefetch intrinsic. + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); + + // ConstantFP nodes default to expand. Targets can either change this to + // Legal, in which case all fp constants are legal, or use isFPImmLegal() + // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f80, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10,MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10,MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + + // Default ISD::TRAP to expand (which turns it into abort). + setOperationAction(ISD::TRAP, MVT::Other, Expand); + + IsLittleEndian = TD->isLittleEndian(); + ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); + maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; + benefitFromCodePlacementOpt = false; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; + StackPointerRegisterToSaveRestore = 0; + ExceptionPointerRegister = 0; + ExceptionSelectorRegister = 0; + BooleanContents = UndefinedBooleanContent; + SchedPreferenceInfo = Sched::Latency; + JumpBufSize = 0; + JumpBufAlignment = 0; + PrefLoopAlignment = 0; + MinStackArgumentAlignment = 1; + ShouldFoldAtomicFences = false; + + InitLibcallNames(LibcallRoutineNames); + InitCmpLibcallCCs(CmpLibcallCCs); + InitLibcallCallingConvs(LibcallCallingConvs); +} + +TargetLowering::~TargetLowering() { + delete &TLOF; +} + +/// canOpTrap - Returns true if the operation can trap for the value type. +/// VT must be a legal type. +bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { + assert(isTypeLegal(VT)); + switch (Op) { + default: + return false; + case ISD::FDIV: + case ISD::FREM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + return true; + } +} + + +static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, + unsigned &NumIntermediates, + EVT &RegisterVT, + TargetLowering *TLI) { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + if (!TLI->isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + EVT DestVT = TLI->getRegisterType(NewVT); + RegisterVT = DestVT; + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// isLegalRC - Return true if the value types that can be represented by the +/// specified register class are all legal. +bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) + return true; + } + return false; +} + +/// hasLegalSuperRegRegClasses - Return true if the specified register class +/// has one or more super-reg register classes that are legal. +bool +TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{ + if (*RC->superregclasses_begin() == 0) + return false; + for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(), + E = RC->superregclasses_end(); I != E; ++I) { + const TargetRegisterClass *RRC = *I; + if (isLegalRC(RRC)) + return true; + } + return false; +} + +/// findRepresentativeClass - Return the largest legal super-reg register class +/// of the register class for the specified type and its associated "cost". +std::pair<const TargetRegisterClass*, uint8_t> +TargetLowering::findRepresentativeClass(EVT VT) const { + const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy]; + if (!RC) + return std::make_pair(RC, 0); + const TargetRegisterClass *BestRC = RC; + for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(), + E = RC->superregclasses_end(); I != E; ++I) { + const TargetRegisterClass *RRC = *I; + if (RRC->isASubClass() || !isLegalRC(RRC)) + continue; + if (!hasLegalSuperRegRegClasses(RRC)) + return std::make_pair(RRC, 1); + BestRC = RRC; + } + return std::make_pair(BestRC, 1); +} + + +/// computeRegisterProperties - Once all of the register classes are added, +/// this allows us to compute derived properties we expose. +void TargetLowering::computeRegisterProperties() { + assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && + "Too many value types for ValueTypeActions to hold!"); + + // Everything defaults to needing one register. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + NumRegistersForVT[i] = 1; + RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; + } + // ...except isVoid, which doesn't need any registers. + NumRegistersForVT[MVT::isVoid] = 0; + + // Find the largest integer register class. + unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; + for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) + assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); + + // Every integer value type larger than this largest register takes twice as + // many registers to represent as the previous ValueType. + for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) { + EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg; + if (!ExpandedVT.isInteger()) + break; + NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; + RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; + TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); + ValueTypeActions.setTypeAction(ExpandedVT, Expand); + } + + // Inspect all of the ValueType's smaller than the largest integer + // register to see which ones need promotion. + unsigned LegalIntReg = LargestIntReg; + for (unsigned IntReg = LargestIntReg - 1; + IntReg >= (unsigned)MVT::i1; --IntReg) { + EVT IVT = (MVT::SimpleValueType)IntReg; + if (isTypeLegal(IVT)) { + LegalIntReg = IntReg; + } else { + RegisterTypeForVT[IntReg] = TransformToType[IntReg] = + (MVT::SimpleValueType)LegalIntReg; + ValueTypeActions.setTypeAction(IVT, Promote); + } + } + + // ppcf128 type is really two f64's. + if (!isTypeLegal(MVT::ppcf128)) { + NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::ppcf128] = MVT::f64; + TransformToType[MVT::ppcf128] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::ppcf128, Expand); + } + + // Decide how to handle f64. If the target does not have native f64 support, + // expand it to i64 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; + RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; + TransformToType[MVT::f64] = MVT::i64; + ValueTypeActions.setTypeAction(MVT::f64, Expand); + } + + // Decide how to handle f32. If the target does not have native support for + // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. + if (!isTypeLegal(MVT::f32)) { + if (isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; + TransformToType[MVT::f32] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::f32, Promote); + } else { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, Expand); + } + } + + // Loop over all of the vector value types to see which need transformations. + for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + if (isTypeLegal(VT)) continue; + + // Determine if there is a legal wider type. If so, we should promote to + // that wider vector type. + EVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + if (NElts != 1) { + bool IsLegalWiderType = false; + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + EVT SVT = (MVT::SimpleValueType)nVT; + if (SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && + isTypeSynthesizable(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, Promote); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) continue; + } + + MVT IntermediateVT; + EVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, + RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + EVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + ValueTypeActions.setTypeAction(VT, Expand); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, Promote); + } + } + + // Determine the 'representative' register class for each value type. + // An representative register class is the largest (meaning one which is + // not a sub-register class / subreg register class) legal register class for + // a group of value types. For example, on i386, i8, i16, and i32 + // representative would be GR32; while on x86_64 it's GR64. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + const TargetRegisterClass* RRC; + uint8_t Cost; + tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); + RepRegClassForVT[i] = RRC; + RepRegClassCostForVT[i] = Cost; + } +} + +const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { + return NULL; +} + + +MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const { + return PointerTy.SimpleTy; +} + +MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const { + return MVT::i32; // return the default value +} + +/// getVectorTypeBreakdown - Vector types are broken down into some number of +/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 +/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. +/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. +/// +/// This method returns the number of registers needed, and the VT for each +/// register. It also returns the VT and quantity of the intermediate values +/// before they are promoted/expanded. +/// +unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, + EVT &IntermediateVT, + unsigned &NumIntermediates, + EVT &RegisterVT) const { + unsigned NumElts = VT.getVectorNumElements(); + + // If there is a wider vector type with the same element type as this one, + // we should widen to that legal vector type. This handles things like + // <2 x float> -> <4 x float>. + if (NumElts != 1 && getTypeAction(VT) == Promote) { + RegisterVT = getTypeToTransformTo(Context, VT); + if (isTypeLegal(RegisterVT)) { + IntermediateVT = RegisterVT; + NumIntermediates = 1; + return 1; + } + } + + // Figure out the right, legal destination reg to copy into. + EVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !isTypeLegal( + EVT::getVectorVT(Context, EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); + if (!isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + EVT DestVT = getRegisterType(Context, NewVT); + RegisterVT = DestVT; + if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// Get the EVTs and ArgFlags collections that represent the legalized return +/// type of the given function. This does not require a DAG or a return value, +/// and is suitable for use before any DAGs for the function are constructed. +/// TODO: Move this out of TargetLowering.cpp. +void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr, + SmallVectorImpl<ISD::OutputArg> &Outs, + const TargetLowering &TLI, + SmallVectorImpl<uint64_t> *Offsets) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + unsigned Offset = 0; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr & Attribute::SExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr & Attribute::ZExt) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( + PartVT.getTypeForEVT(ReturnType->getContext())); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr & Attribute::InReg) + Flags.setInReg(); + + // Propagate extension type if any + if (attr & Attribute::SExt) + Flags.setSExt(); + else if (attr & Attribute::ZExt) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); + if (Offsets) { + Offsets->push_back(Offset); + Offset += PartSize; + } + } + } +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. This is the actual +/// alignment, not its logarithm. +unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const { + return TD->getCallFrameTypeAlignment(Ty); +} + +/// getJumpTableEncoding - Return the entry encoding for a jump table in the +/// current function. The returned value is a member of the +/// MachineJumpTableInfo::JTEntryKind enum. +unsigned TargetLowering::getJumpTableEncoding() const { + // In non-pic modes, just use the address of a block. + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + return MachineJumpTableInfo::EK_BlockAddress; + + // In PIC mode, if the target supports a GPRel32 directive, use it. + if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0) + return MachineJumpTableInfo::EK_GPRel32BlockAddress; + + // Otherwise, use a label difference. + return MachineJumpTableInfo::EK_LabelDifference32; +} + +SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + // If our PIC model is GP relative, use the global offset table as the base. + if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress) + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return Table; +} + +/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the +/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an +/// MCExpr. +const MCExpr * +TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, + unsigned JTI,MCContext &Ctx) const{ + // The normal PIC reloc base is the label at the start of the jump table. + return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx); +} + +bool +TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // Assume that everything is safe in static mode. + if (getTargetMachine().getRelocationModel() == Reloc::Static) + return true; + + // In dynamic-no-pic mode, assume that known defined values are safe. + if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC && + GA && + !GA->getGlobal()->isDeclaration() && + !GA->getGlobal()->isWeakForLinker()) + return true; + + // Otherwise assume nothing is safe. + return false; +} + +//===----------------------------------------------------------------------===// +// Optimization Methods +//===----------------------------------------------------------------------===// + +/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// specified instruction is a constant integer. If so, check to see if there +/// are any bits set in the constant that are not demanded. If so, shrink the +/// constant and return true. +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, + const APInt &Demanded) { + DebugLoc dl = Op.getDebugLoc(); + + // FIXME: ISD::SELECT, ISD::SELECT_CC + switch (Op.getOpcode()) { + default: break; + case ISD::XOR: + case ISD::AND: + case ISD::OR: { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!C) return false; + + if (Op.getOpcode() == ISD::XOR && + (C->getAPIntValue() | (~Demanded)).isAllOnesValue()) + return false; + + // if we can expand it to have all bits set, do it + if (C->getAPIntValue().intersects(~Demanded)) { + EVT VT = Op.getValueType(); + SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), + DAG.getConstant(Demanded & + C->getAPIntValue(), + VT)); + return CombineTo(Op, New); + } + + break; + } + } + + return false; +} + +/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the +/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening +/// cast, but it could be generalized for targets with other types of +/// implicit widening casts. +bool +TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, + unsigned BitWidth, + const APInt &Demanded, + DebugLoc dl) { + assert(Op.getNumOperands() == 2 && + "ShrinkDemandedOp only supports binary operators!"); + assert(Op.getNode()->getNumValues() == 1 && + "ShrinkDemandedOp only supports nodes with one result!"); + + // Don't do this if the node has another user, which may require the + // full value. + if (!Op.getNode()->hasOneUse()) + return false; + + // Search for the smallest integer type with free casts to and from + // Op's type. For expedience, just check power-of-2 integer types. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros(); + if (!isPowerOf2_32(SmallVTBits)) + SmallVTBits = NextPowerOf2(SmallVTBits); + for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits); + if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && + TLI.isZExtFree(SmallVT, Op.getValueType())) { + // We found a type with free casts. + SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT, + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(0)), + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(1))); + SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X); + return CombineTo(Op, Z); + } + } + return false; +} + +/// SimplifyDemandedBits - Look at Op. At this point, we know that only the +/// DemandedMask bits of the result of Op are ever used downstream. If we can +/// use this information to simplify Op, create a new simplified DAG node and +/// return true, returning the original and new nodes in Old and New. Otherwise, +/// analyze the expression and return a mask of KnownOne and KnownZero bits for +/// the expression (used to simplify the caller). The KnownZero/One bits may +/// only be accurate for those bits in the DemandedMask. +bool TargetLowering::SimplifyDemandedBits(SDValue Op, + const APInt &DemandedMask, + APInt &KnownZero, + APInt &KnownOne, + TargetLoweringOpt &TLO, + unsigned Depth) const { + unsigned BitWidth = DemandedMask.getBitWidth(); + assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth && + "Mask size mismatches value type size!"); + APInt NewMask = DemandedMask; + DebugLoc dl = Op.getDebugLoc(); + + // Don't know anything. + KnownZero = KnownOne = APInt(BitWidth, 0); + + // Other users may use these bits. + if (!Op.getNode()->hasOneUse()) { + if (Depth != 0) { + // If not at the root, Just compute the KnownZero/KnownOne bits to + // simplify things downstream. + TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + return false; + } + // If this is the root being simplified, allow it to have multiple uses, + // just set the NewMask to all bits. + NewMask = APInt::getAllOnesValue(BitWidth); + } else if (DemandedMask == 0) { + // Not demanding any bits from Op. + if (Op.getOpcode() != ISD::UNDEF) + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); + return false; + } else if (Depth == 6) { // Limit search depth. + return false; + } + + APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask; + KnownZero = ~KnownOne & NewMask; + return false; // Don't fall through, will infinitely loop. + case ISD::AND: + // If the RHS is a constant, check to see if the LHS would be zero without + // using the bits from the RHS. Below, we use knowledge about the RHS to + // simplify the LHS, here we're using information from the LHS to simplify + // the RHS. + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + APInt LHSZero, LHSOne; + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, + LHSZero, LHSOne, Depth+1); + // If the LHS already has zeros where RHSC does, this and is dead. + if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + // If any of the set bits in the RHS are known zero on the LHS, shrink + // the constant. + if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) + return true; + } + + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known one on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((NewMask & (KnownZero|KnownZero2)) == NewMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + break; + case ISD::OR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + break; + case ISD::XOR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((KnownZero & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((KnownZero2 & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // If all of the unknown bits are known to be zero on one side or the other + // (but not both) turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if ((NewMask & ~KnownZero & ~KnownZero2) == 0) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), + Op.getOperand(0), + Op.getOperand(1))); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + + // If all of the demanded bits on one side are known, and all of the set + // bits on that side are also known to be set on the other side, turn this + // into an AND, as we know the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known + if ((KnownOne & KnownOne2) == KnownOne) { + EVT VT = Op.getValueType(); + SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, + Op.getOperand(0), ANDC)); + } + } + + // If the RHS is a constant, see if we can simplify it. + // for XOR, we prefer to force bits to 1 if they will make a -1. + // if we can't force bits, try to shrink constant + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + APInt Expanded = C->getAPIntValue() | (~NewMask); + // if we can expand it to have all bits set, do it + if (Expanded.isAllOnesValue()) { + if (Expanded != C->getAPIntValue()) { + EVT VT = Op.getValueType(); + SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), + TLO.DAG.getConstant(Expanded, VT)); + return TLO.CombineTo(Op, New); + } + // if it already has all the bits set, nothing to change + // but don't shrink either! + } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { + return true; + } + } + + KnownZero = KnownZeroOut; + KnownOne = KnownOneOut; + break; + case ISD::SELECT: + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SELECT_CC: + if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SHL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a + // single shift. We can do this if the bottom bits (which are shifted + // out) are never demanded. + if (InOp.getOpcode() == ISD::SRL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), + KnownZero, KnownOne, TLO, Depth+1)) + return true; + + // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits + // are not demanded. This will likely allow the anyext to be folded away. + if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { + SDValue InnerOp = InOp.getNode()->getOperand(0); + EVT InnerVT = InnerOp.getValueType(); + if ((APInt::getHighBitsSet(BitWidth, + BitWidth - InnerVT.getSizeInBits()) & + DemandedMask) == 0 && + isTypeDesirableForOp(ISD::SHL, InnerVT)) { + EVT ShTy = getShiftAmountTy(); + if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) + ShTy = InnerVT; + SDValue NarrowShl = + TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, + TLO.DAG.getConstant(ShAmt, ShTy)); + return + TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), + NarrowShl)); + } + } + + KnownZero <<= SA->getZExtValue(); + KnownOne <<= SA->getZExtValue(); + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue()); + } + break; + case ISD::SRL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + EVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + unsigned VTSize = VT.getSizeInBits(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a + // single shift. We can do this if the top bits (which are shifted out) + // are never demanded. + if (InOp.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) { + unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + // Compute the new bits that are at the top now. + if (SimplifyDemandedBits(InOp, (NewMask << ShAmt), + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + KnownZero |= HighBits; // High bits known zero. + } + break; + case ISD::SRA: + // If this is an arithmetic shift right and only the low-bit is set, we can + // always convert this into a logical shr, even if the shift amount is + // variable. The low bit of the shift cannot be an input sign bit unless + // the shift amount is >= the size of the datatype, which is undefined. + if (DemandedMask == 1) + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1))); + + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + EVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + APInt InDemandedMask = (NewMask << ShAmt); + + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + if (HighBits.intersects(NewMask)) + InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits()); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bit, adjusted to where it is now in the mask. + APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + Op.getOperand(0), + Op.getOperand(1))); + } else if (KnownOne.intersects(SignBit)) { // New bits are known one. + KnownOne |= HighBits; + } + } + break; + case ISD::SIGN_EXTEND_INREG: { + EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = + APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getScalarType().getSizeInBits()); + + // If none of the extended bits are demanded, eliminate the sextinreg. + if ((NewBits & NewMask) == 0) + return TLO.CombineTo(Op, Op.getOperand(0)); + + APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits()); + InSignBit.zext(BitWidth); + APInt InputDemandedBits = + APInt::getLowBitsSet(BitWidth, + EVT.getScalarType().getSizeInBits()) & + NewMask; + + // Since the sign extended bits are demanded, we know that the sign + // bit is demanded. + InputDemandedBits |= InSignBit; + + if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, convert this into a zero extension. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, + TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); + + if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + break; + } + case ISD::ZERO_EXTEND: { + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + APInt InMask = NewMask; + InMask.trunc(OperandBitWidth); + + // If none of the top bits are demanded, convert this into an any_extend. + APInt NewBits = + APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; + if (!NewBits.intersects(NewMask)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + KnownZero |= NewBits; + break; + } + case ISD::SIGN_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); + APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); + APInt NewBits = ~InMask & NewMask; + + // If none of the top bits are demanded, convert this into an any_extend. + if (NewBits == 0) + return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // Since some of the sign extended bits are demanded, we know that the sign + // bit is demanded. + APInt InDemandedBits = InMask & NewMask; + InDemandedBits |= InSignBit; + InDemandedBits.trunc(InBits); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + + // If the sign bit is known zero, convert this to a zero extend. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // If the sign bit is known one, the top bits match. + if (KnownOne.intersects(InSignBit)) { + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Otherwise, top bits aren't known. + KnownOne &= ~NewBits; + KnownZero &= ~NewBits; + } + break; + } + case ISD::ANY_EXTEND: { + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + APInt InMask = NewMask; + InMask.trunc(OperandBitWidth); + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + break; + } + case ISD::TRUNCATE: { + // Simplify the input, using demanded bit information, and compute the known + // zero/one bits live out. + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + APInt TruncMask = NewMask; + TruncMask.zext(OperandBitWidth); + if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + KnownZero.trunc(BitWidth); + KnownOne.trunc(BitWidth); + + // If the input is only used by this truncate, see if we can shrink it based + // on the known demanded bits. + if (Op.getOperand(0).getNode()->hasOneUse()) { + SDValue In = Op.getOperand(0); + switch (In.getOpcode()) { + default: break; + case ISD::SRL: + // Shrink SRL by a constant if none of the high bits shifted in are + // demanded. + if (TLO.LegalTypes() && + !isTypeDesirableForOp(ISD::SRL, Op.getValueType())) + // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is + // undesirable. + break; + ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); + if (!ShAmt) + break; + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, + OperandBitWidth - BitWidth); + HighBits = HighBits.lshr(ShAmt->getZExtValue()); + HighBits.trunc(BitWidth); + + if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, + Op.getValueType(), + NewTrunc, + In.getOperand(1))); + } + break; + } + } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + break; + } + case ISD::AssertZext: { + // Demand all the bits of the input that are demanded in the output. + // The low bits are obvious; the high bits are demanded because we're + // asserting that they're zero here. + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); + KnownZero |= ~InMask & NewMask; + break; + } + case ISD::BIT_CONVERT: +#if 0 + // If this is an FP->Int bitcast and if the sign bit is the only thing that + // is demanded, turn this into a FGETSIGN. + if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) && + MVT::isFloatingPoint(Op.getOperand(0).getValueType()) && + !MVT::isVector(Op.getOperand(0).getValueType())) { + // Only do this xform if FGETSIGN is valid or if before legalize. + if (!TLO.AfterLegalize || + isOperationLegal(ISD::FGETSIGN, Op.getValueType())) { + // Make a FGETSIGN + SHL to move the sign bit into the appropriate + // place. We expect the SHL to be eliminated by other optimizations. + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), + Op.getOperand(0)); + unsigned ShVal = Op.getValueType().getSizeInBits()-1; + SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(), + Sign, ShAmt)); + } + } +#endif + break; + case ISD::ADD: + case ISD::MUL: + case ISD::SUB: { + // Add, Sub, and Mul don't demand any bits in positions beyond that + // of the highest bit demanded of them. + APInt LoMask = APInt::getLowBitsSet(BitWidth, + BitWidth - NewMask.countLeadingZeros()); + if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + // See if the operation should be performed at a smaller bit width. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + } + // FALL THROUGH + default: + // Just use ComputeMaskedBits to compute output bits. + TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); + break; + } + + // If we know the value of all of the demanded bits, return this as a + // constant. + if ((NewMask & (KnownZero|KnownOne)) == NewMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); + + return false; +} + +/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// in Mask are known to be either zero or one and return them in the +/// KnownZero/KnownOne bitsets. +void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); +} + +/// ComputeNumSignBitsForTargetNode - This method can be implemented by +/// targets that want to expose additional information about sign bits to the +/// DAG Combiner. +unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use ComputeNumSignBits if you don't know whether Op" + " is a target node!"); + return 1; +} + +/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly +/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to +/// determine which bit is set. +/// +static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { + // A left-shift of a constant one will have exactly one bit set, because + // shifting the bit off the end is undefined. + if (Val.getOpcode() == ISD::SHL) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) + if (C->getAPIntValue() == 1) + return true; + + // Similarly, a right-shift of a constant sign-bit will have exactly + // one bit set. + if (Val.getOpcode() == ISD::SRL) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) + if (C->getAPIntValue().isSignBit()) + return true; + + // More could be done here, though the above checks are enough + // to handle some common cases. + + // Fall back to ComputeMaskedBits to catch other known cases. + EVT OpVT = Val.getValueType(); + unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); + return (KnownZero.countPopulation() == BitWidth - 1) && + (KnownOne.countPopulation() == 1); +} + +/// SimplifySetCC - Try to simplify a setcc built with the specified operands +/// and cc. If it is unable to simplify it, return a null SDValue. +SDValue +TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, bool foldBooleans, + DAGCombinerInfo &DCI, DebugLoc dl) const { + SelectionDAG &DAG = DCI.DAG; + LLVMContext &Context = *DAG.getContext(); + + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return DAG.getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return DAG.getConstant(1, VT); + } + + if (isa<ConstantSDNode>(N0.getNode())) { + // Ensure that the constant occurs on the RHS, and fold constant + // comparisons. + return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + } + + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an + // equality comparison, then we're just comparing whether X itself is + // zero. + if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + N0.getOperand(0).getOpcode() == ISD::CTLZ && + N0.getOperand(1).getOpcode() == ISD::Constant) { + const APInt &ShAmt + = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; + } + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), + Zero, Cond); + } + } + + // If the LHS is '(and load, const)', the RHS is 0, + // the test is for equality or unsigned, and all 1 bits of the const are + // in the same partial word, see if we can shorten the load. + if (DCI.isBeforeLegalize() && + N0.getOpcode() == ISD::AND && C1 == 0 && + N0.getNode()->hasOneUse() && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(0).getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); + APInt bestMask; + unsigned bestWidth = 0, bestOffset = 0; + if (!Lod->isVolatile() && Lod->isUnindexed()) { + unsigned origWidth = N0.getValueType().getSizeInBits(); + unsigned maskWidth = origWidth; + // We can narrow (e.g.) 16-bit extending loads on 32-bit target to + // 8 bits, but have to be careful... + if (Lod->getExtensionType() != ISD::NON_EXTLOAD) + origWidth = Lod->getMemoryVT().getSizeInBits(); + const APInt &Mask = + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + for (unsigned width = origWidth / 2; width>=8; width /= 2) { + APInt newMask = APInt::getLowBitsSet(maskWidth, width); + for (unsigned offset=0; offset<origWidth/width; offset++) { + if ((newMask & Mask) == Mask) { + if (!TD->isLittleEndian()) + bestOffset = (origWidth/width - offset - 1) * (width/8); + else + bestOffset = (uint64_t)offset * (width/8); + bestMask = Mask.lshr(offset * (width/8) * 8); + bestWidth = width; + break; + } + newMask = newMask << width; + } + } + } + if (bestWidth) { + EVT newVT = EVT::getIntegerVT(Context, bestWidth); + if (newVT.isRound()) { + EVT PtrType = Lod->getOperand(1).getValueType(); + SDValue Ptr = Lod->getBasePtr(); + if (bestOffset != 0) + Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), + DAG.getConstant(bestOffset, PtrType)); + unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); + SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getSrcValue(), + Lod->getSrcValueOffset() + bestOffset, + false, false, NewAlign); + return DAG.getSetCC(dl, VT, + DAG.getNode(ISD::AND, dl, newVT, NewLoad, + DAG.getConstant(bestMask.trunc(bestWidth), + newVT)), + DAG.getConstant(0LL, newVT), Cond); + } + } + } + + // If the LHS is a ZERO_EXTEND, perform the comparison on the input. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); + + // If the comparison constant has bits in the upper part, the + // zero-extended value could never match. + if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), + C1.getBitWidth() - InSize))) { + switch (Cond) { + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETEQ: return DAG.getConstant(0, VT); + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETNE: return DAG.getConstant(1, VT); + case ISD::SETGT: + case ISD::SETGE: + // True if the sign bit of C1 is set. + return DAG.getConstant(C1.isNegative(), VT); + case ISD::SETLT: + case ISD::SETLE: + // True if the sign bit of C1 isn't set. + return DAG.getConstant(C1.isNonNegative(), VT); + default: + break; + } + } + + // Otherwise, we can perform the comparison with the low bits. + switch (Cond) { + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: { + EVT newVT = N0.getOperand(0).getValueType(); + if (DCI.isBeforeLegalizeOps() || + (isOperationLegal(ISD::SETCC, newVT) && + getCondCodeAction(Cond, newVT)==Legal)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(APInt(C1).trunc(InSize), newVT), + Cond); + break; + } + default: + break; // todo, be more careful with signed comparisons + } + } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); + unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); + EVT ExtDstTy = N0.getValueType(); + unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); + + // If the constant doesn't fit into the number of bits for the source of + // the sign extension, it is impossible for both sides to be equal. + if (C1.getMinSignedBits() > ExtSrcTyBits) + return DAG.getConstant(Cond == ISD::SETNE, VT); + + SDValue ZextOp; + EVT Op0Ty = N0.getOperand(0).getValueType(); + if (Op0Ty == ExtSrcTy) { + ZextOp = N0.getOperand(0); + } else { + APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); + ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), + DAG.getConstant(Imm, Op0Ty)); + } + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(ZextOp.getNode()); + // Otherwise, make this a use of a zext. + return DAG.getSetCC(dl, VT, ZextOp, + DAG.getConstant(C1 & APInt::getLowBitsSet( + ExtDstTyBits, + ExtSrcTyBits), + ExtDstTy), + Cond); + } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC + if (N0.getOpcode() == ISD::SETCC && + isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); + if (TrueWhenTrue) + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); + // Invert the condition. + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + CC = ISD::getSetCCInverse(CC, + N0.getOperand(0).getValueType().isInteger()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + } + + if ((N0.getOpcode() == ISD::XOR || + (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR && + N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && + isa<ConstantSDNode>(N0.getOperand(1)) && + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We + // can only do this if the top bits are known zero. + unsigned BitWidth = N0.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N0, + APInt::getHighBitsSet(BitWidth, + BitWidth-1))) { + // Okay, get the un-inverted input value. + SDValue Val; + if (N0.getOpcode() == ISD::XOR) + Val = N0.getOperand(0); + else { + assert(N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR); + // ((X^1)&1)^1 -> X & 1 + Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), + N0.getOperand(0).getOperand(0), + N0.getOperand(1)); + } + + return DAG.getSetCC(dl, VT, Val, N1, + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } else if (N1C->getAPIntValue() == 1 && + (VT == MVT::i1 || + getBooleanContents() == ZeroOrOneBooleanContent)) { + SDValue Op0 = N0; + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + + if ((Op0.getOpcode() == ISD::XOR) && + Op0.getOperand(0).getOpcode() == ISD::SETCC && + Op0.getOperand(1).getOpcode() == ISD::SETCC) { + // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) + Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; + return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), + Cond); + } else if (Op0.getOpcode() == ISD::AND && + isa<ConstantSDNode>(Op0.getOperand(1)) && + cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. + if (Op0.getValueType().bitsGT(VT)) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, VT)); + else if (Op0.getValueType().bitsLT(VT)) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, VT)); + + return DAG.getSetCC(dl, VT, Op0, + DAG.getConstant(0, Op0.getValueType()), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } + } + + APInt MinVal, MaxVal; + unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + if (ISD::isSignedIntSetCC(Cond)) { + MinVal = APInt::getSignedMinValue(OperandBitSize); + MaxVal = APInt::getSignedMaxValue(OperandBitSize); + } else { + MinVal = APInt::getMinValue(OperandBitSize); + MaxVal = APInt::getMaxValue(OperandBitSize); + } + + // Canonicalize GE/LE comparisons to use GT/LT comparisons. + if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { + if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true + // X >= C0 --> X > (C0-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1-1, N1.getValueType()), + (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + } + + if (Cond == ISD::SETLE || Cond == ISD::SETULE) { + if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true + // X <= C0 --> X < (C0+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1+1, N1.getValueType()), + (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + } + + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) + return DAG.getConstant(0, VT); // X < MIN --> false + if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) + return DAG.getConstant(1, VT); // X >= MIN --> true + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) + return DAG.getConstant(0, VT); // X > MAX --> false + if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) + return DAG.getConstant(1, VT); // X <= MAX --> true + + // Canonicalize setgt X, Min --> setne X, Min + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + // Canonicalize setlt X, Max --> setne X, Max + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, N0.getValueType()), + ISD::SETEQ); + // If we have setugt X, Max-1, turn it into seteq X, Max + else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, N0.getValueType()), + ISD::SETEQ); + + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } + + // Fold bit comparisons when we can. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + (VT == N0.getValueType() || + (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && + N0.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = + dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + EVT ShiftTy = DCI.isBeforeLegalize() ? + getPointerTy() : getShiftAmountTy(); + if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 + // Perform the xform if the AND RHS is a single bit. + if (AndRHS->getAPIntValue().isPowerOf2()) { + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, + DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy))); + } + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { + // (X & 8) == 8 --> (X & 8) >> 3 + // Perform the xform if C1 is a single bit. + if (C1.isPowerOf2()) { + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, + DAG.getConstant(C1.logBase2(), ShiftTy))); + } + } + } + } + + if (isa<ConstantFPSDNode>(N0.getNode())) { + // Constant fold or commute setcc. + SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl); + if (O.getNode()) return O; + } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) { + // If the RHS of an FP comparison is a constant, simplify it away in + // some cases. + if (CFP->getValueAPF().isNaN()) { + // If an operand is known to be a nan, we can fold it. + switch (ISD::getUnorderedFlavor(Cond)) { + default: llvm_unreachable("Unknown flavor!"); + case 0: // Known false. + return DAG.getConstant(0, VT); + case 1: // Known true. + return DAG.getConstant(1, VT); + case 2: // Undefined. + return DAG.getUNDEF(VT); + } + } + + // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the + // constant if knowing that the operand is non-nan is enough. We prefer to + // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to + // materialize 0.0. + if (Cond == ISD::SETO || Cond == ISD::SETUO) + return DAG.getSetCC(dl, VT, N0, N0, Cond); + + // If the condition is not legal, see if we can find an equivalent one + // which is legal. + if (!isCondCodeLegal(Cond, N0.getValueType())) { + // If the comparison was an awkward floating-point == or != and one of + // the comparison operands is infinity or negative infinity, convert the + // condition to a less-awkward <= or >=. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOLE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOLE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETUGT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETUGT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); + } else { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOGE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOGE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETULT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETULT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); + } + } + } + } + + if (N0 == N1) { + // We can always fold X == X for integer setcc's. + if (N0.getValueType().isInteger()) + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + unsigned UOF = ISD::getUnorderedFlavor(Cond); + if (UOF == 2) // FP operators that are undefined on NaNs. + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) + return DAG.getConstant(UOF, VT); + // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO + // if it is not already. + ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; + if (NewCond != Cond) + return DAG.getSetCC(dl, VT, N0, N1, NewCond); + } + + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getValueType().isInteger()) { + if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || + N0.getOpcode() == ISD::XOR) { + // Simplify (X+Y) == (X+Z) --> Y == Z + if (N0.getOpcode() == N1.getOpcode()) { + if (N0.getOperand(0) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); + if (N0.getOperand(1) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); + if (DAG.isCommutativeBinOp(N0.getOpcode())) { + // If X op Y == Y op X, try other combinations. + if (N0.getOperand(0) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), + Cond); + if (N0.getOperand(1) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), + Cond); + } + } + + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { + if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + // Turn (X+C1) == C2 --> X == C2-C1 + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(RHSC->getAPIntValue()- + LHSR->getAPIntValue(), + N0.getValueType()), Cond); + } + + // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. + if (N0.getOpcode() == ISD::XOR) + // If we know that all of the inverted bits are zero, don't bother + // performing the inversion. + if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) + return + DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(LHSR->getAPIntValue() ^ + RHSC->getAPIntValue(), + N0.getValueType()), + Cond); + } + + // Turn (C1-X) == C2 --> X == C1-C2 + if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { + if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { + return + DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(SUBC->getAPIntValue() - + RHSC->getAPIntValue(), + N0.getValueType()), + Cond); + } + } + } + + // Simplify (X+Z) == X --> Z == 0 + if (N0.getOperand(0) == N1) + return DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, N0.getValueType()), Cond); + else if (N0.getNode()->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + // (Z-X) == X --> Z == X<<1 + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), + N1, + DAG.getConstant(1, getShiftAmountTy())); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + } + } + } + + if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || + N1.getOpcode() == ISD::XOR) { + // Simplify X == (X+Z) --> Z == 0 + if (N1.getOperand(0) == N0) { + return DAG.getSetCC(dl, VT, N1.getOperand(1), + DAG.getConstant(0, N1.getValueType()), Cond); + } else if (N1.getOperand(1) == N0) { + if (DAG.isCommutativeBinOp(N1.getOpcode())) { + return DAG.getSetCC(dl, VT, N1.getOperand(0), + DAG.getConstant(0, N1.getValueType()), Cond); + } else if (N1.getNode()->hasOneUse()) { + assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); + // X == (Z-X) --> X<<1 == Z + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, + DAG.getConstant(1, getShiftAmountTy())); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); + } + } + } + + // Simplify x&y == y to x&y != 0 if y has exactly one bit set. + // Note that where y is variable and is known to have at most + // one bit set (for example, if it is z&1) we cannot do this; + // the expressions are not equivalent when y==0. + if (N0.getOpcode() == ISD::AND) + if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { + if (ValueHasExactlyOneBitSet(N1, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + SDValue Zero = DAG.getConstant(0, N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, Zero, Cond); + } + } + if (N1.getOpcode() == ISD::AND) + if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { + if (ValueHasExactlyOneBitSet(N0, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N1, Zero, Cond); + } + } + } + + // Fold away ALL boolean setcc's. + SDValue Temp; + if (N0.getValueType() == MVT::i1 && foldBooleans) { + switch (Cond) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETEQ: // X == Y -> ~(X^Y) + Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + N0 = DAG.getNOT(dl, Temp, MVT::i1); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETNE: // X != Y --> (X^Y) + N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + break; + case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y + case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X + case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y + case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X + case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp); + break; + } + if (VT != MVT::i1) { + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(N0.getNode()); + // FIXME: If running after legalize, we probably can't do this. + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0); + } + return N0; + } + + // Could not fold it. + return SDValue(); +} + +/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the +/// node is a GlobalAddress + offset. +bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA, + int64_t &Offset) const { + if (isa<GlobalAddressSDNode>(N)) { + GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); + GA = GASD->getGlobal(); + Offset += GASD->getOffset(); + return true; + } + + if (N->getOpcode() == ISD::ADD) { + SDValue N1 = N->getOperand(0); + SDValue N2 = N->getOperand(1); + if (isGAPlusOffset(N1.getNode(), GA, Offset)) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); + if (V) { + Offset += V->getSExtValue(); + return true; + } + } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); + if (V) { + Offset += V->getSExtValue(); + return true; + } + } + } + return false; +} + + +SDValue TargetLowering:: +PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + // Default implementation: no optimization. + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembler Implementation Methods +//===----------------------------------------------------------------------===// + + +TargetLowering::ConstraintType +TargetLowering::getConstraintType(const std::string &Constraint) const { + // FIXME: lots more standard ones to handle. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'r': return C_RegisterClass; + case 'm': // memory + case 'o': // offsetable + case 'V': // not offsetable + return C_Memory; + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': // Relocatable Constant + case 'X': // Allow ANY value. + case 'I': // Target registers. + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + return C_Other; + } + } + + if (Constraint.size() > 1 && Constraint[0] == '{' && + Constraint[Constraint.size()-1] == '}') + return C_Register; + return C_Unknown; +} + +/// LowerXConstraint - try to replace an X constraint, which matches anything, +/// with another that has more specific requirements based on the type of the +/// corresponding operand. +const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ + if (ConstraintVT.isInteger()) + return "r"; + if (ConstraintVT.isFloatingPoint()) + return "f"; // works for many targets + return 0; +} + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + char ConstraintLetter, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + switch (ConstraintLetter) { + default: break; + case 'X': // Allows any operand; labels (basic block) use this. + if (Op.getOpcode() == ISD::BasicBlock) { + Ops.push_back(Op); + return; + } + // fall through + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': { // Relocatable Constant + // These operands are interested in values of the form (GV+C), where C may + // be folded in as an offset of GV, or it may be explicitly added. Also, it + // is possible and fine if either GV or C are missing. + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); + + // If we have "(add GV, C)", pull out GV/C + if (Op.getOpcode() == ISD::ADD) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); + if (C == 0 || GA == 0) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); + } + if (C == 0 || GA == 0) + C = 0, GA = 0; + } + + // If we find a valid operand, map to the TargetXXX version so that the + // value itself doesn't get selected. + if (GA) { // Either &GV or &GV+C + if (ConstraintLetter != 'n') { + int64_t Offs = GA->getOffset(); + if (C) Offs += C->getZExtValue(); + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + C ? C->getDebugLoc() : DebugLoc(), + Op.getValueType(), Offs)); + return; + } + } + if (C) { // just C, no GV. + // Simple constants are not allowed for 's'. + if (ConstraintLetter != 's') { + // gcc prints these as sign extended. Sign extend value to 64 bits + // now; without this it would get ZExt'd later in + // ScheduleDAGSDNodes::EmitNode, which is very generic. + Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), + MVT::i64)); + return; + } + } + break; + } + } +} + +std::vector<unsigned> TargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + return std::vector<unsigned>(); +} + + +std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + if (Constraint[0] != '{') + return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); + assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); + + // Remove the braces from around the name. + StringRef RegName(Constraint.data()+1, Constraint.size()-2); + + // Figure out which register class contains this reg. + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), + E = RI->regclass_end(); RCI != E; ++RCI) { + const TargetRegisterClass *RC = *RCI; + + // If none of the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + bool isLegal = false; + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) { + isLegal = true; + break; + } + } + + if (!isLegal) continue; + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + if (RegName.equals_lower(RI->getName(*I))) + return std::make_pair(*I, RC); + } + } + + return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0)); +} + +//===----------------------------------------------------------------------===// +// Constraint Selection. + +/// isMatchingInputConstraint - Return true of this is an input operand that is +/// a matching constraint like "4". +bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return isdigit(ConstraintCode[0]); +} + +/// getMatchedOperand - If this is an input matching constraint, this method +/// returns the output operand it matches. +unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return atoi(ConstraintCode.c_str()); +} + + +/// getConstraintGenerality - Return an integer indicating how general CT +/// is. +static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { + switch (CT) { + default: llvm_unreachable("Unknown constraint type!"); + case TargetLowering::C_Other: + case TargetLowering::C_Unknown: + return 0; + case TargetLowering::C_Register: + return 1; + case TargetLowering::C_RegisterClass: + return 2; + case TargetLowering::C_Memory: + return 3; + } +} + +/// ChooseConstraint - If there are multiple different constraints that we +/// could pick for this operand (e.g. "imr") try to pick the 'best' one. +/// This is somewhat tricky: constraints fall into four classes: +/// Other -> immediates and magic values +/// Register -> one specific register +/// RegisterClass -> a group of regs +/// Memory -> memory +/// Ideally, we would pick the most specific constraint possible: if we have +/// something that fits into a register, we would pick it. The problem here +/// is that if we have something that could either be in a register or in +/// memory that use of the register could cause selection of *other* +/// operands to fail: they might only succeed if we pick memory. Because of +/// this the heuristic we use is: +/// +/// 1) If there is an 'other' constraint, and if the operand is valid for +/// that constraint, use it. This makes us take advantage of 'i' +/// constraints when available. +/// 2) Otherwise, pick the most general constraint present. This prefers +/// 'm' over 'r', for example. +/// +static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, + const TargetLowering &TLI, + SDValue Op, SelectionDAG *DAG) { + assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); + unsigned BestIdx = 0; + TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; + int BestGenerality = -1; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { + TargetLowering::ConstraintType CType = + TLI.getConstraintType(OpInfo.Codes[i]); + + // If this is an 'other' constraint, see if the operand is valid for it. + // For example, on X86 we might have an 'rI' constraint. If the operand + // is an integer in the range [0..31] we want to use I (saving a load + // of a register), otherwise we must use 'r'. + if (CType == TargetLowering::C_Other && Op.getNode()) { + assert(OpInfo.Codes[i].size() == 1 && + "Unhandled multi-letter 'other' constraint"); + std::vector<SDValue> ResultOps; + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], + ResultOps, *DAG); + if (!ResultOps.empty()) { + BestType = CType; + BestIdx = i; + break; + } + } + + // Things with matching constraints can only be registers, per gcc + // documentation. This mainly affects "g" constraints. + if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) + continue; + + // This constraint letter is more general than the previous one, use it. + int Generality = getConstraintGenerality(CType); + if (Generality > BestGenerality) { + BestType = CType; + BestIdx = i; + BestGenerality = Generality; + } + } + + OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; + OpInfo.ConstraintType = BestType; +} + +/// ComputeConstraintToUse - Determines the constraint code and constraint +/// type to use for the specific AsmOperandInfo, setting +/// OpInfo.ConstraintCode and OpInfo.ConstraintType. +void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, + SDValue Op, + SelectionDAG *DAG) const { + assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); + + // Single-letter constraints ('r') are very common. + if (OpInfo.Codes.size() == 1) { + OpInfo.ConstraintCode = OpInfo.Codes[0]; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } else { + ChooseConstraint(OpInfo, *this, Op, DAG); + } + + // 'X' matches anything. + if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { + // Labels and constants are handled elsewhere ('X' is the only thing + // that matches labels). For Functions, the type here is the type of + // the result, which is not what we want to look at; leave them alone. + Value *v = OpInfo.CallOperandVal; + if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) { + OpInfo.CallOperandVal = v; + return; + } + + // Otherwise, try to resolve it to something we know about by looking at + // the actual operand type. + if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { + OpInfo.ConstraintCode = Repl; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } + } +} + +//===----------------------------------------------------------------------===// +// Loop Strength Reduction hooks +//===----------------------------------------------------------------------===// + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + // The default implementation of this implements a conservative RISCy, r+r and + // r+i addr mode. + + // Allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + } + + return true; +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, + std::vector<SDNode*>* Created) const { + EVT VT = N->getValueType(0); + DebugLoc dl= N->getDebugLoc(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + APInt::ms magics = d.magic(); + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + SDValue Q; + if (isOperationLegalOrCustom(ISD::MULHS, VT)) + Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, VT)).getNode(), 1); + else + return SDValue(); // No mulhs or equvialent + // If d > 0 and m < 0, add the numerator + if (d.isStrictlyPositive() && magics.m.isNegative()) { + Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.getNode()); + } + // If d < 0 and m > 0, subtract the numerator. + if (d.isNegative() && magics.m.isStrictlyPositive()) { + Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.getNode()); + } + // Shift right algebraic if shift value is nonzero + if (magics.s > 0) { + Q = DAG.getNode(ISD::SRA, dl, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy())); + if (Created) + Created->push_back(Q.getNode()); + } + // Extract the sign bit and add it to the quotient + SDValue T = + DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1, + getShiftAmountTy())); + if (Created) + Created->push_back(T.getNode()); + return DAG.getNode(ISD::ADD, dl, VT, Q, T); +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, + std::vector<SDNode*>* Created) const { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + // FIXME: We should use a narrower constant when the upper + // bits are known to be zero. + ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1)); + APInt::mu magics = N1C->getAPIntValue().magicu(); + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + SDValue Q; + if (isOperationLegalOrCustom(ISD::MULHU, VT)) + Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, VT)).getNode(), 1); + else + return SDValue(); // No mulhu or equvialent + if (Created) + Created->push_back(Q.getNode()); + + if (magics.a == 0) { + assert(magics.s < N1C->getAPIntValue().getBitWidth() && + "We shouldn't generate an undefined shift!"); + return DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy())); + } else { + SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); + if (Created) + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, + DAG.getConstant(1, getShiftAmountTy())); + if (Created) + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); + if (Created) + Created->push_back(NPQ.getNode()); + return DAG.getNode(ISD::SRL, dl, VT, NPQ, + DAG.getConstant(magics.s-1, getShiftAmountTy())); + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp new file mode 100644 index 0000000..a081e3c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -0,0 +1,23 @@ +//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) + : TD(TM.getTargetData()) { +} + +TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { +} diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp new file mode 100644 index 0000000..6ab0cb0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp @@ -0,0 +1,451 @@ +//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics for targets that do +// not natively support them (which includes the C backend). Note that the code +// generated is not quite as efficient as algorithms which generate stack maps +// to identify roots. +// +// This pass implements the code transformation described in this paper: +// "Accurate Garbage Collection in an Uncooperative Environment" +// Fergus Henderson, ISMM, 2002 +// +// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with +// ShadowStackGC. +// +// In order to support this particular transformation, all stack roots are +// coallocated in the stack. This allows a fully target-independent stack map +// while introducing only minor runtime overhead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "shadowstackgc" +#include "llvm/CodeGen/GCs.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/IRBuilder.h" + +using namespace llvm; + +namespace { + + class ShadowStackGC : public GCStrategy { + /// RootChain - This is the global linked-list that contains the chain of GC + /// roots. + GlobalVariable *Head; + + /// StackEntryTy - Abstract type of a link in the shadow stack. + /// + const StructType *StackEntryTy; + + /// Roots - GC roots in the current function. Each is a pair of the + /// intrinsic call and its corresponding alloca. + std::vector<std::pair<CallInst*,AllocaInst*> > Roots; + + public: + ShadowStackGC(); + + bool initializeCustomLowering(Module &M); + bool performCustomLowering(Function &F); + + private: + bool IsNullValue(Value *V); + Constant *GetFrameMap(Function &F); + const Type* GetConcreteStackEntryType(Function &F); + void CollectRoots(Function &F); + static GetElementPtrInst *CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Value *BasePtr, + int Idx1, const char *Name); + static GetElementPtrInst *CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Value *BasePtr, + int Idx1, int Idx2, const char *Name); + }; + +} + +static GCRegistry::Add<ShadowStackGC> +X("shadow-stack", "Very portable GC for uncooperative code generators"); + +namespace { + /// EscapeEnumerator - This is a little algorithm to find all escape points + /// from a function so that "finally"-style code can be inserted. In addition + /// to finding the existing return and unwind instructions, it also (if + /// necessary) transforms any call instructions into invokes and sends them to + /// a landing pad. + /// + /// It's wrapped up in a state machine using the same transform C# uses for + /// 'yield return' enumerators, This transform allows it to be non-allocating. + class EscapeEnumerator { + Function &F; + const char *CleanupBBName; + + // State. + int State; + Function::iterator StateBB, StateE; + IRBuilder<> Builder; + + public: + EscapeEnumerator(Function &F, const char *N = "cleanup") + : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {} + + IRBuilder<> *Next() { + switch (State) { + default: + return 0; + + case 0: + StateBB = F.begin(); + StateE = F.end(); + State = 1; + + case 1: + // Find all 'return' and 'unwind' instructions. + while (StateBB != StateE) { + BasicBlock *CurBB = StateBB++; + + // Branches and invokes do not escape, only unwind and return do. + TerminatorInst *TI = CurBB->getTerminator(); + if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI)) + continue; + + Builder.SetInsertPoint(TI->getParent(), TI); + return &Builder; + } + + State = 2; + + // Find all 'call' instructions. + SmallVector<Instruction*,16> Calls; + for (Function::iterator BB = F.begin(), + E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), + EE = BB->end(); II != EE; ++II) + if (CallInst *CI = dyn_cast<CallInst>(II)) + if (!CI->getCalledFunction() || + !CI->getCalledFunction()->getIntrinsicID()) + Calls.push_back(CI); + + if (Calls.empty()) + return 0; + + // Create a cleanup block. + BasicBlock *CleanupBB = BasicBlock::Create(F.getContext(), + CleanupBBName, &F); + UnwindInst *UI = new UnwindInst(F.getContext(), CleanupBB); + + // Transform the 'call' instructions into 'invoke's branching to the + // cleanup block. Go in reverse order to make prettier BB names. + SmallVector<Value*,16> Args; + for (unsigned I = Calls.size(); I != 0; ) { + CallInst *CI = cast<CallInst>(Calls[--I]); + + // Split the basic block containing the function call. + BasicBlock *CallBB = CI->getParent(); + BasicBlock *NewBB = + CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); + + // Remove the unconditional branch inserted at the end of CallBB. + CallBB->getInstList().pop_back(); + NewBB->getInstList().remove(CI); + + // Create a new invoke instruction. + Args.clear(); + CallSite CS(CI); + Args.append(CS.arg_begin(), CS.arg_end()); + + InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), + NewBB, CleanupBB, + Args.begin(), Args.end(), + CI->getName(), CallBB); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + CI->replaceAllUsesWith(II); + delete CI; + } + + Builder.SetInsertPoint(UI->getParent(), UI); + return &Builder; + } + } + }; +} + +// ----------------------------------------------------------------------------- + +void llvm::linkShadowStackGC() { } + +ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) { + InitRoots = true; + CustomRoots = true; +} + +Constant *ShadowStackGC::GetFrameMap(Function &F) { + // doInitialization creates the abstract type of this value. + const Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); + + // Truncate the ShadowStackDescriptor if some metadata is null. + unsigned NumMeta = 0; + SmallVector<Constant*,16> Metadata; + for (unsigned I = 0; I != Roots.size(); ++I) { + Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1)); + if (!C->isNullValue()) + NumMeta = I + 1; + Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); + } + + Constant *BaseElts[] = { + ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false), + ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false), + }; + + Constant *DescriptorElts[] = { + ConstantStruct::get(F.getContext(), BaseElts, 2, false), + ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), + Metadata.begin(), NumMeta) + }; + + Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2, + false); + + std::string TypeName("gc_map."); + TypeName += utostr(NumMeta); + F.getParent()->addTypeName(TypeName, FrameMap->getType()); + + // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems + // that, short of multithreaded LLVM, it should be safe; all that is + // necessary is that a simple Module::iterator loop not be invalidated. + // Appending to the GlobalVariable list is safe in that sense. + // + // All of the output passes emit globals last. The ExecutionEngine + // explicitly supports adding globals to the module after + // initialization. + // + // Still, if it isn't deemed acceptable, then this transformation needs + // to be a ModulePass (which means it cannot be in the 'llc' pipeline + // (which uses a FunctionPassManager (which segfaults (not asserts) if + // provided a ModulePass))). + Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true, + GlobalVariable::InternalLinkage, + FrameMap, "__gc_" + F.getName()); + + Constant *GEPIndices[2] = { + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) + }; + return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2); +} + +const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { + // doInitialization creates the generic version of this type. + std::vector<const Type*> EltTys; + EltTys.push_back(StackEntryTy); + for (size_t I = 0; I != Roots.size(); I++) + EltTys.push_back(Roots[I].second->getAllocatedType()); + Type *Ty = StructType::get(F.getContext(), EltTys); + + std::string TypeName("gc_stackentry."); + TypeName += F.getName(); + F.getParent()->addTypeName(TypeName, Ty); + + return Ty; +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. If +/// not, exit fast. +bool ShadowStackGC::initializeCustomLowering(Module &M) { + // struct FrameMap { + // int32_t NumRoots; // Number of roots in stack frame. + // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. + // void *Meta[]; // May be absent for roots without metadata. + // }; + std::vector<const Type*> EltTys; + // 32 bits is ok up to a 32GB stack frame. :) + EltTys.push_back(Type::getInt32Ty(M.getContext())); + // Specifies length of variable length array. + EltTys.push_back(Type::getInt32Ty(M.getContext())); + StructType *FrameMapTy = StructType::get(M.getContext(), EltTys); + M.addTypeName("gc_map", FrameMapTy); + PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); + + // struct StackEntry { + // ShadowStackEntry *Next; // Caller's stack entry. + // FrameMap *Map; // Pointer to constant FrameMap. + // void *Roots[]; // Stack roots (in-place array, so we pretend). + // }; + OpaqueType *RecursiveTy = OpaqueType::get(M.getContext()); + + EltTys.clear(); + EltTys.push_back(PointerType::getUnqual(RecursiveTy)); + EltTys.push_back(FrameMapPtrTy); + PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys); + + RecursiveTy->refineAbstractTypeTo(LinkTyH.get()); + StackEntryTy = cast<StructType>(LinkTyH.get()); + const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); + M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from + // a FunctionPass? + + // Get the root chain if it already exists. + Head = M.getGlobalVariable("llvm_gc_root_chain"); + if (!Head) { + // If the root chain does not exist, insert a new one with linkonce + // linkage! + Head = new GlobalVariable(M, StackEntryPtrTy, false, + GlobalValue::LinkOnceAnyLinkage, + Constant::getNullValue(StackEntryPtrTy), + "llvm_gc_root_chain"); + } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { + Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); + Head->setLinkage(GlobalValue::LinkOnceAnyLinkage); + } + + return true; +} + +bool ShadowStackGC::IsNullValue(Value *V) { + if (Constant *C = dyn_cast<Constant>(V)) + return C->isNullValue(); + return false; +} + +void ShadowStackGC::CollectRoots(Function &F) { + // FIXME: Account for original alignment. Could fragment the root array. + // Approach 1: Null initialize empty slots at runtime. Yuck. + // Approach 2: Emit a map of the array instead of just a count. + + assert(Roots.empty() && "Not cleaned up?"); + + SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::gcroot) { + std::pair<CallInst*, AllocaInst*> Pair = std::make_pair( + CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); + if (IsNullValue(CI->getArgOperand(1))) + Roots.push_back(Pair); + else + MetaRoots.push_back(Pair); + } + + // Number roots with metadata (usually empty) at the beginning, so that the + // FrameMap::Meta array can be elided. + Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); +} + +GetElementPtrInst * +ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, + int Idx, int Idx2, const char *Name) { + Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx), + ConstantInt::get(Type::getInt32Ty(Context), Idx2) }; + Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +GetElementPtrInst * +ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, + int Idx, const char *Name) { + Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx) }; + Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +/// runOnFunction - Insert code to maintain the shadow stack. +bool ShadowStackGC::performCustomLowering(Function &F) { + LLVMContext &Context = F.getContext(); + + // Find calls to llvm.gcroot. + CollectRoots(F); + + // If there are no roots in this function, then there is no need to add a + // stack map entry for it. + if (Roots.empty()) + return false; + + // Build the constant map and figure the type of the shadow stack entry. + Value *FrameMap = GetFrameMap(F); + const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); + + // Build the shadow stack entry at the very start of the function. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + IRBuilder<> AtEntry(IP->getParent(), IP); + + Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0, + "gc_frame"); + + while (isa<AllocaInst>(IP)) ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Initialize the map pointer and load the current head of the shadow stack. + Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); + Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry, + 0,1,"gc_frame.map"); + AtEntry.CreateStore(FrameMap, EntryMapPtr); + + // After all the allocas... + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + // For each root, find the corresponding slot in the aggregate... + Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root"); + + // And use it in lieu of the alloca. + AllocaInst *OriginalAlloca = Roots[I].second; + SlotPtr->takeName(OriginalAlloca); + OriginalAlloca->replaceAllUsesWith(SlotPtr); + } + + // Move past the original stores inserted by GCStrategy::InitRoots. This isn't + // really necessary (the collector would never see the intermediate state at + // runtime), but it's nicer not to push the half-initialized entry onto the + // shadow stack. + while (isa<StoreInst>(IP)) ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Push the entry onto the shadow stack. + Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, + StackEntry,0,0,"gc_frame.next"); + Instruction *NewHeadVal = CreateGEP(Context, AtEntry, + StackEntry, 0, "gc_newhead"); + AtEntry.CreateStore(CurrentHead, EntryNextPtr); + AtEntry.CreateStore(NewHeadVal, Head); + + // For each instruction that escapes... + EscapeEnumerator EE(F, "gc_cleanup"); + while (IRBuilder<> *AtExit = EE.Next()) { + // Pop the entry from the shadow stack. Don't reuse CurrentHead from + // AtEntry, since that would make the value live for the entire function. + Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0, + "gc_frame.next"); + Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); + AtExit->CreateStore(SavedHead, Head); + } + + // Delete the original allocas (which are no longer used) and the intrinsic + // calls (which are no longer valid). Doing this last avoids invalidating + // iterators. + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + Roots[I].first->eraseFromParent(); + Roots[I].second->eraseFromParent(); + } + + Roots.clear(); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp new file mode 100644 index 0000000..aeaa38b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp @@ -0,0 +1,1152 @@ +//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a shrink wrapping variant of prolog/epilog insertion: +// - Spills and restores of callee-saved registers (CSRs) are placed in the +// machine CFG to tightly surround their uses so that execution paths that +// do not use CSRs do not pay the spill/restore penalty. +// +// - Avoiding placment of spills/restores in loops: if a CSR is used inside a +// loop the spills are placed in the loop preheader, and restores are +// placed in the loop exit nodes (the successors of loop _exiting_ nodes). +// +// - Covering paths without CSR uses: +// If a region in a CFG uses CSRs and has multiple entry and/or exit points, +// the use info for the CSRs inside the region is propagated outward in the +// CFG to ensure validity of the spill/restore placements. This decreases +// the effectiveness of shrink wrapping but does not require edge splitting +// in the machine CFG. +// +// This shrink wrapping implementation uses an iterative analysis to determine +// which basic blocks require spills and restores for CSRs. +// +// This pass uses MachineDominators and MachineLoopInfo. Loop information +// is used to prevent placement of callee-saved register spills/restores +// in the bodies of loops. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "shrink-wrap" + +#include "PrologEpilogInserter.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include <sstream> + +using namespace llvm; + +STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); + +// Shrink Wrapping: +static cl::opt<bool> +ShrinkWrapping("shrink-wrap", + cl::desc("Shrink wrap callee-saved register spills/restores")); + +// Shrink wrap only the specified function, a debugging aid. +static cl::opt<std::string> +ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, + cl::desc("Shrink wrap the specified function"), + cl::value_desc("funcname"), + cl::init("")); + +// Debugging level for shrink wrapping. +enum ShrinkWrapDebugLevel { + None, BasicInfo, Iterations, Details +}; + +static cl::opt<enum ShrinkWrapDebugLevel> +ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, + cl::desc("Print shrink wrapping debugging information"), + cl::values( + clEnumVal(None , "disable debug output"), + clEnumVal(BasicInfo , "print basic DF sets"), + clEnumVal(Iterations, "print SR sets for each iteration"), + clEnumVal(Details , "print all DF sets"), + clEnumValEnd)); + + +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + if (ShrinkWrapping || ShrinkWrapFunc != "") { + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + } + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +//===----------------------------------------------------------------------===// +// ShrinkWrapping implementation +//===----------------------------------------------------------------------===// + +// Convienences for dealing with machine loops. +MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { + assert(LP && "Machine loop is NULL."); + MachineBasicBlock* PHDR = LP->getLoopPreheader(); + MachineLoop* PLP = LP->getParentLoop(); + while (PLP) { + PHDR = PLP->getLoopPreheader(); + PLP = PLP->getParentLoop(); + } + return PHDR; +} + +MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { + if (LP == 0) + return 0; + MachineLoop* PLP = LP->getParentLoop(); + while (PLP) { + LP = PLP; + PLP = PLP->getParentLoop(); + } + return LP; +} + +bool PEI::isReturnBlock(MachineBasicBlock* MBB) { + return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn()); +} + +// Initialize shrink wrapping DFA sets, called before iterations. +void PEI::clearAnticAvailSets() { + AnticIn.clear(); + AnticOut.clear(); + AvailIn.clear(); + AvailOut.clear(); +} + +// Clear all sets constructed by shrink wrapping. +void PEI::clearAllSets() { + ReturnBlocks.clear(); + clearAnticAvailSets(); + UsedCSRegs.clear(); + CSRUsed.clear(); + TLLoops.clear(); + CSRSave.clear(); + CSRRestore.clear(); +} + +// Initialize all shrink wrapping data. +void PEI::initShrinkWrappingInfo() { + clearAllSets(); + EntryBlock = 0; +#ifndef NDEBUG + HasFastExitPath = false; +#endif + ShrinkWrapThisFunction = ShrinkWrapping; + // DEBUG: enable or disable shrink wrapping for the current function + // via --shrink-wrap-func=<funcname>. +#ifndef NDEBUG + if (ShrinkWrapFunc != "") { + std::string MFName = MF->getFunction()->getNameStr(); + ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); + } +#endif +} + + +/// placeCSRSpillsAndRestores - determine which MBBs of the function +/// need save, restore code for callee-saved registers by doing a DF analysis +/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs +/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo +/// is used to ensure that CSR save/restore code is not placed inside loops. +/// This function computes the maps of MBBs -> CSRs to spill and restore +/// in CSRSave, CSRRestore. +/// +/// If shrink wrapping is not being performed, place all spills in +/// the entry block, all restores in return blocks. In this case, +/// CSRSave has a single mapping, CSRRestore has mappings for each +/// return block. +/// +void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { + + DEBUG(MF = &Fn); + + initShrinkWrappingInfo(); + + DEBUG(if (ShrinkWrapThisFunction) { + dbgs() << "Place CSR spills/restores for " + << MF->getFunction()->getName() << "\n"; + }); + + if (calculateSets(Fn)) + placeSpillsAndRestores(Fn); +} + +/// calcAnticInOut - calculate the anticipated in/out reg sets +/// for the given MBB by looking forward in the MCFG at MBB's +/// successors. +/// +bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { + bool changed = false; + + // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) + SmallVector<MachineBasicBlock*, 4> successors; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC != MBB) + successors.push_back(SUCC); + } + + unsigned i = 0, e = successors.size(); + if (i != e) { + CSRegSet prevAnticOut = AnticOut[MBB]; + MachineBasicBlock* SUCC = successors[i]; + + AnticOut[MBB] = AnticIn[SUCC]; + for (++i; i != e; ++i) { + SUCC = successors[i]; + AnticOut[MBB] &= AnticIn[SUCC]; + } + if (prevAnticOut != AnticOut[MBB]) + changed = true; + } + + // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); + CSRegSet prevAnticIn = AnticIn[MBB]; + AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; + if (prevAnticIn |= AnticIn[MBB]) + changed = true; + return changed; +} + +/// calcAvailInOut - calculate the available in/out reg sets +/// for the given MBB by looking backward in the MCFG at MBB's +/// predecessors. +/// +bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { + bool changed = false; + + // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) + SmallVector<MachineBasicBlock*, 4> predecessors; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED != MBB) + predecessors.push_back(PRED); + } + + unsigned i = 0, e = predecessors.size(); + if (i != e) { + CSRegSet prevAvailIn = AvailIn[MBB]; + MachineBasicBlock* PRED = predecessors[i]; + + AvailIn[MBB] = AvailOut[PRED]; + for (++i; i != e; ++i) { + PRED = predecessors[i]; + AvailIn[MBB] &= AvailOut[PRED]; + } + if (prevAvailIn != AvailIn[MBB]) + changed = true; + } + + // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); + CSRegSet prevAvailOut = AvailOut[MBB]; + AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; + if (prevAvailOut |= AvailOut[MBB]) + changed = true; + return changed; +} + +/// calculateAnticAvail - build the sets anticipated and available +/// registers in the MCFG of the current function iteratively, +/// doing a combined forward and backward analysis. +/// +void PEI::calculateAnticAvail(MachineFunction &Fn) { + // Initialize data flow sets. + clearAnticAvailSets(); + + // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. + bool changed = true; + unsigned iterations = 0; + while (changed) { + changed = false; + ++iterations; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + + // Calculate anticipated in, out regs at MBB from + // anticipated at successors of MBB. + changed |= calcAnticInOut(MBB); + + // Calculate available in, out regs at MBB from + // available at predecessors of MBB. + changed |= calcAvailInOut(MBB); + } + } + + DEBUG({ + if (ShrinkWrapDebugging >= Details) { + dbgs() + << "-----------------------------------------------------------\n" + << " Antic/Avail Sets:\n" + << "-----------------------------------------------------------\n" + << "iterations = " << iterations << "\n" + << "-----------------------------------------------------------\n" + << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" + << "-----------------------------------------------------------\n"; + + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpSets(MBB); + } + + dbgs() + << "-----------------------------------------------------------\n"; + } + }); +} + +/// propagateUsesAroundLoop - copy used register info from MBB to all blocks +/// of the loop given by LP and its parent loops. This prevents spills/restores +/// from being placed in the bodies of loops. +/// +void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { + if (! MBB || !LP) + return; + + std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); + for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { + MachineBasicBlock* LBB = loopBlocks[i]; + if (LBB == MBB) + continue; + if (CSRUsed[LBB].contains(CSRUsed[MBB])) + continue; + CSRUsed[LBB] |= CSRUsed[MBB]; + } +} + +/// calculateSets - collect the CSRs used in this function, compute +/// the DF sets that describe the initial minimal regions in the +/// Machine CFG around which CSR spills and restores must be placed. +/// +/// Additionally, this function decides if shrink wrapping should +/// be disabled for the current function, checking the following: +/// 1. the current function has more than 500 MBBs: heuristic limit +/// on function size to reduce compile time impact of the current +/// iterative algorithm. +/// 2. all CSRs are used in the entry block. +/// 3. all CSRs are used in all immediate successors of the entry block. +/// 4. all CSRs are used in a subset of blocks, each of which dominates +/// all return blocks. These blocks, taken as a subgraph of the MCFG, +/// are equivalent to the entry block since all execution paths pass +/// through them. +/// +bool PEI::calculateSets(MachineFunction &Fn) { + // Sets used to compute spill, restore placement sets. + const std::vector<CalleeSavedInfo> CSI = + Fn.getFrameInfo()->getCalleeSavedInfo(); + + // If no CSRs used, we are done. + if (CSI.empty()) { + DEBUG(if (ShrinkWrapThisFunction) + dbgs() << "DISABLED: " << Fn.getFunction()->getName() + << ": uses no callee-saved registers\n"); + return false; + } + + // Save refs to entry and return blocks. + EntryBlock = Fn.begin(); + for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); + MBB != E; ++MBB) + if (isReturnBlock(MBB)) + ReturnBlocks.push_back(MBB); + + // Determine if this function has fast exit paths. + DEBUG(if (ShrinkWrapThisFunction) + findFastExitPath()); + + // Limit shrink wrapping via the current iterative bit vector + // implementation to functions with <= 500 MBBs. + if (Fn.size() > 500) { + DEBUG(if (ShrinkWrapThisFunction) + dbgs() << "DISABLED: " << Fn.getFunction()->getName() + << ": too large (" << Fn.size() << " MBBs)\n"); + ShrinkWrapThisFunction = false; + } + + // Return now if not shrink wrapping. + if (! ShrinkWrapThisFunction) + return false; + + // Collect set of used CSRs. + for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { + UsedCSRegs.set(inx); + } + + // Walk instructions in all MBBs, create CSRUsed[] sets, choose + // whether or not to shrink wrap this function. + MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); + MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + + bool allCSRUsesInEntryBlock = true; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { + for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { + unsigned Reg = CSI[inx].getReg(); + // If instruction I reads or modifies Reg, add it to UsedCSRegs, + // CSRUsed map for the current block. + for (unsigned opInx = 0, opEnd = I->getNumOperands(); + opInx != opEnd; ++opInx) { + const MachineOperand &MO = I->getOperand(opInx); + if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MOReg == Reg || + (TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(Reg, MOReg))) { + // CSR Reg is defined/used in block MBB. + CSRUsed[MBB].set(inx); + // Check for uses in EntryBlock. + if (MBB != EntryBlock) + allCSRUsesInEntryBlock = false; + } + } + } + } + + if (CSRUsed[MBB].empty()) + continue; + + // Propagate CSRUsed[MBB] in loops + if (MachineLoop* LP = LI.getLoopFor(MBB)) { + // Add top level loop to work list. + MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); + MachineLoop* PLP = getTopLevelLoopParent(LP); + + if (! HDR) { + HDR = PLP->getHeader(); + assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); + MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); + HDR = *PI; + } + TLLoops[HDR] = PLP; + + // Push uses from inside loop to its parent loops, + // or to all other MBBs in its loop. + if (LP->getLoopDepth() > 1) { + for (MachineLoop* PLP = LP->getParentLoop(); PLP; + PLP = PLP->getParentLoop()) { + propagateUsesAroundLoop(MBB, PLP); + } + } else { + propagateUsesAroundLoop(MBB, LP); + } + } + } + + if (allCSRUsesInEntryBlock) { + DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in EntryBlock\n"); + ShrinkWrapThisFunction = false; + } else { + bool allCSRsUsedInEntryFanout = true; + for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), + SE = EntryBlock->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (CSRUsed[SUCC] != UsedCSRegs) + allCSRsUsedInEntryFanout = false; + } + if (allCSRsUsedInEntryFanout) { + DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in imm successors of EntryBlock\n"); + ShrinkWrapThisFunction = false; + } + } + + if (ShrinkWrapThisFunction) { + // Check if MBB uses CSRs and dominates all exit nodes. + // Such nodes are equiv. to the entry node w.r.t. + // CSR uses: every path through the function must + // pass through this node. If each CSR is used at least + // once by these nodes, shrink wrapping is disabled. + CSRegSet CSRUsedInChokePoints; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) + continue; + bool dominatesExitNodes = true; + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) + if (! DT.dominates(MBB, ReturnBlocks[ri])) { + dominatesExitNodes = false; + break; + } + if (dominatesExitNodes) { + CSRUsedInChokePoints |= CSRUsed[MBB]; + if (CSRUsedInChokePoints == UsedCSRegs) { + DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in choke point(s) at " + << getBasicBlockName(MBB) << "\n"); + ShrinkWrapThisFunction = false; + break; + } + } + } + } + + // Return now if we have decided not to apply shrink wrapping + // to the current function. + if (! ShrinkWrapThisFunction) + return false; + + DEBUG({ + dbgs() << "ENABLED: " << Fn.getFunction()->getName(); + if (HasFastExitPath) + dbgs() << " (fast exit path)"; + dbgs() << "\n"; + if (ShrinkWrapDebugging >= BasicInfo) { + dbgs() << "------------------------------" + << "-----------------------------\n"; + dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; + if (ShrinkWrapDebugging >= Details) { + dbgs() << "------------------------------" + << "-----------------------------\n"; + dumpAllUsed(); + } + } + }); + + // Build initial DF sets to determine minimal regions in the + // Machine CFG around which CSRs must be spilled and restored. + calculateAnticAvail(Fn); + + return true; +} + +/// addUsesForMEMERegion - add uses of CSRs spilled or restored in +/// multi-entry, multi-exit (MEME) regions so spill and restore +/// placement will not break code that enters or leaves a +/// shrink-wrapped region by inducing spills with no matching +/// restores or restores with no matching spills. A MEME region +/// is a subgraph of the MCFG with multiple entry edges, multiple +/// exit edges, or both. This code propagates use information +/// through the MCFG until all paths requiring spills and restores +/// _outside_ the computed minimal placement regions have been covered. +/// +bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4>& blks) { + if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { + bool processThisBlock = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC->pred_size() > 1) { + processThisBlock = true; + break; + } + } + if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED->succ_size() > 1) { + processThisBlock = true; + break; + } + } + } + if (! processThisBlock) + return false; + } + + CSRegSet prop; + if (!CSRSave[MBB].empty()) + prop = CSRSave[MBB]; + else if (!CSRRestore[MBB].empty()) + prop = CSRRestore[MBB]; + else + prop = CSRUsed[MBB]; + if (prop.empty()) + return false; + + // Propagate selected bits to successors, predecessors of MBB. + bool addedUses = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + // Self-loop + if (SUCC == MBB) + continue; + if (! CSRUsed[SUCC].contains(prop)) { + CSRUsed[SUCC] |= prop; + addedUses = true; + blks.push_back(SUCC); + DEBUG(if (ShrinkWrapDebugging >= Iterations) + dbgs() << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(prop) << ")->" + << "successor " << getBasicBlockName(SUCC) << "\n"); + } + } + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + // Self-loop + if (PRED == MBB) + continue; + if (! CSRUsed[PRED].contains(prop)) { + CSRUsed[PRED] |= prop; + addedUses = true; + blks.push_back(PRED); + DEBUG(if (ShrinkWrapDebugging >= Iterations) + dbgs() << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(prop) << ")->" + << "predecessor " << getBasicBlockName(PRED) << "\n"); + } + } + return addedUses; +} + +/// addUsesForTopLevelLoops - add uses for CSRs used inside top +/// level loops to the exit blocks of those loops. +/// +bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { + bool addedUses = false; + + // Place restores for top level loops where needed. + for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator + I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { + MachineBasicBlock* MBB = I->first; + MachineLoop* LP = I->second; + MachineBasicBlock* HDR = LP->getHeader(); + SmallVector<MachineBasicBlock*, 4> exitBlocks; + CSRegSet loopSpills; + + loopSpills = CSRSave[MBB]; + if (CSRSave[MBB].empty()) { + loopSpills = CSRUsed[HDR]; + assert(!loopSpills.empty() && "No CSRs used in loop?"); + } else if (CSRRestore[MBB].contains(CSRSave[MBB])) + continue; + + LP->getExitBlocks(exitBlocks); + assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); + for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { + MachineBasicBlock* EXB = exitBlocks[i]; + if (! CSRUsed[EXB].contains(loopSpills)) { + CSRUsed[EXB] |= loopSpills; + addedUses = true; + DEBUG(if (ShrinkWrapDebugging >= Iterations) + dbgs() << "LOOP " << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(loopSpills) << ")->" + << getBasicBlockName(EXB) << "\n"); + if (EXB->succ_size() > 1 || EXB->pred_size() > 1) + blks.push_back(EXB); + } + } + } + return addedUses; +} + +/// calcSpillPlacements - determine which CSRs should be spilled +/// in MBB using AnticIn sets of MBB's predecessors, keeping track +/// of changes to spilled reg sets. Add MBB to the set of blocks +/// that need to be processed for propagating use info to cover +/// multi-entry/exit regions. +/// +bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevSpills) { + bool placedSpills = false; + // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) + CSRegSet anticInPreds; + SmallVector<MachineBasicBlock*, 4> predecessors; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED != MBB) + predecessors.push_back(PRED); + } + unsigned i = 0, e = predecessors.size(); + if (i != e) { + MachineBasicBlock* PRED = predecessors[i]; + anticInPreds = UsedCSRegs - AnticIn[PRED]; + for (++i; i != e; ++i) { + PRED = predecessors[i]; + anticInPreds &= (UsedCSRegs - AnticIn[PRED]); + } + } else { + // Handle uses in entry blocks (which have no predecessors). + // This is necessary because the DFA formulation assumes the + // entry and (multiple) exit nodes cannot have CSR uses, which + // is not the case in the real world. + anticInPreds = UsedCSRegs; + } + // Compute spills required at MBB: + CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; + + if (! CSRSave[MBB].empty()) { + if (MBB == EntryBlock) { + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) + CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; + } else { + // Reset all regs spilled in MBB that are also spilled in EntryBlock. + if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { + CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; + } + } + } + placedSpills = (CSRSave[MBB] != prevSpills[MBB]); + prevSpills[MBB] = CSRSave[MBB]; + // Remember this block for adding restores to successor + // blocks for multi-entry region. + if (placedSpills) + blks.push_back(MBB); + + DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) + dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]) << "\n"); + + return placedSpills; +} + +/// calcRestorePlacements - determine which CSRs should be restored +/// in MBB using AvailOut sets of MBB's succcessors, keeping track +/// of changes to restored reg sets. Add MBB to the set of blocks +/// that need to be processed for propagating use info to cover +/// multi-entry/exit regions. +/// +bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevRestores) { + bool placedRestores = false; + // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) + CSRegSet availOutSucc; + SmallVector<MachineBasicBlock*, 4> successors; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC != MBB) + successors.push_back(SUCC); + } + unsigned i = 0, e = successors.size(); + if (i != e) { + MachineBasicBlock* SUCC = successors[i]; + availOutSucc = UsedCSRegs - AvailOut[SUCC]; + for (++i; i != e; ++i) { + SUCC = successors[i]; + availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); + } + } else { + if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { + // Handle uses in return blocks (which have no successors). + // This is necessary because the DFA formulation assumes the + // entry and (multiple) exit nodes cannot have CSR uses, which + // is not the case in the real world. + availOutSucc = UsedCSRegs; + } + } + // Compute restores required at MBB: + CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; + + // Postprocess restore placements at MBB. + // Remove the CSRs that are restored in the return blocks. + // Lest this be confusing, note that: + // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. + if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { + if (! CSRSave[EntryBlock].empty()) + CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; + } + placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); + prevRestores[MBB] = CSRRestore[MBB]; + // Remember this block for adding saves to predecessor + // blocks for multi-entry region. + if (placedRestores) + blks.push_back(MBB); + + DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) + dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); + + return placedRestores; +} + +/// placeSpillsAndRestores - place spills and restores of CSRs +/// used in MBBs in minimal regions that contain the uses. +/// +void PEI::placeSpillsAndRestores(MachineFunction &Fn) { + CSRegBlockMap prevCSRSave; + CSRegBlockMap prevCSRRestore; + SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; + bool changed = true; + unsigned iterations = 0; + + // Iterate computation of spill and restore placements in the MCFG until: + // 1. CSR use info has been fully propagated around the MCFG, and + // 2. computation of CSRSave[], CSRRestore[] reach fixed points. + while (changed) { + changed = false; + ++iterations; + + DEBUG(if (ShrinkWrapDebugging >= Iterations) + dbgs() << "iter " << iterations + << " --------------------------------------------------\n"); + + // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, + // which determines the placements of spills and restores. + // Keep track of changes to spills, restores in each iteration to + // minimize the total iterations. + bool SRChanged = false; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + + // Place spills for CSRs in MBB. + SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); + + // Place restores for CSRs in MBB. + SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); + } + + // Add uses of CSRs used inside loops where needed. + changed |= addUsesForTopLevelLoops(cvBlocks); + + // Add uses for CSRs spilled or restored at branch, join points. + if (changed || SRChanged) { + while (! cvBlocks.empty()) { + MachineBasicBlock* MBB = cvBlocks.pop_back_val(); + changed |= addUsesForMEMERegion(MBB, ncvBlocks); + } + if (! ncvBlocks.empty()) { + cvBlocks = ncvBlocks; + ncvBlocks.clear(); + } + } + + if (changed) { + calculateAnticAvail(Fn); + CSRSave.clear(); + CSRRestore.clear(); + } + } + + // Check for effectiveness: + // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} + // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] + // Gives a measure of how many CSR spills have been moved from EntryBlock + // to minimal regions enclosing their uses. + CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); + unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); + numSRReduced += numSRReducedThisFunc; + DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { + dbgs() << "-----------------------------------------------------------\n"; + dbgs() << "total iterations = " << iterations << " ( " + << Fn.getFunction()->getName() + << " " << numSRReducedThisFunc + << " " << Fn.size() + << " )\n"; + dbgs() << "-----------------------------------------------------------\n"; + dumpSRSets(); + dbgs() << "-----------------------------------------------------------\n"; + if (numSRReducedThisFunc) + verifySpillRestorePlacement(); + }); +} + +// Debugging methods. +#ifndef NDEBUG +/// findFastExitPath - debugging method used to detect functions +/// with at least one path from the entry block to a return block +/// directly or which has a very small number of edges. +/// +void PEI::findFastExitPath() { + if (! EntryBlock) + return; + // Fina a path from EntryBlock to any return block that does not branch: + // Entry + // | ... + // v | + // B1<-----+ + // | + // v + // Return + for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), + SE = EntryBlock->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + + // Assume positive, disprove existence of fast path. + HasFastExitPath = true; + + // Check the immediate successors. + if (isReturnBlock(SUCC)) { + if (ShrinkWrapDebugging >= BasicInfo) + dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) + << "->" << getBasicBlockName(SUCC) << "\n"; + break; + } + // Traverse df from SUCC, look for a branch block. + std::string exitPath = getBasicBlockName(SUCC); + for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), + BE = df_end(SUCC); BI != BE; ++BI) { + MachineBasicBlock* SBB = *BI; + // Reject paths with branch nodes. + if (SBB->succ_size() > 1) { + HasFastExitPath = false; + break; + } + exitPath += "->" + getBasicBlockName(SBB); + } + if (HasFastExitPath) { + if (ShrinkWrapDebugging >= BasicInfo) + dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) + << "->" << exitPath << "\n"; + break; + } + } +} + +/// verifySpillRestorePlacement - check the current spill/restore +/// sets for safety. Attempt to find spills without restores or +/// restores without spills. +/// Spills: walk df from each MBB in spill set ensuring that +/// all CSRs spilled at MMBB are restored on all paths +/// from MBB to all exit blocks. +/// Restores: walk idf from each MBB in restore set ensuring that +/// all CSRs restored at MBB are spilled on all paths +/// reaching MBB. +/// +void PEI::verifySpillRestorePlacement() { + unsigned numReturnBlocks = 0; + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + if (isReturnBlock(MBB) || MBB->succ_size() == 0) + ++numReturnBlocks; + } + for (CSRegBlockMap::iterator BI = CSRSave.begin(), + BE = CSRSave.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet spilled = BI->second; + CSRegSet restored; + + if (spilled.empty()) + continue; + + DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(spilled) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); + + if (CSRRestore[MBB].intersects(spilled)) { + restored |= (CSRRestore[MBB] & spilled); + } + + // Walk depth first from MBB to find restores of all CSRs spilled at MBB: + // we must find restores for all spills w/no intervening spills on all + // paths from MBB to all return blocks. + for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), + BE = df_end(MBB); BI != BE; ++BI) { + MachineBasicBlock* SBB = *BI; + if (SBB == MBB) + continue; + // Stop when we encounter spills of any CSRs spilled at MBB that + // have not yet been seen to be restored. + if (CSRSave[SBB].intersects(spilled) && + !restored.contains(CSRSave[SBB] & spilled)) + break; + // Collect the CSRs spilled at MBB that are restored + // at this DF successor of MBB. + if (CSRRestore[SBB].intersects(spilled)) + restored |= (CSRRestore[SBB] & spilled); + // If we are at a retun block, check that the restores + // we have seen so far exhaust the spills at MBB, then + // reset the restores. + if (isReturnBlock(SBB) || SBB->succ_size() == 0) { + if (restored != spilled) { + CSRegSet notRestored = (spilled - restored); + DEBUG(dbgs() << MF->getFunction()->getName() << ": " + << stringifyCSRegSet(notRestored) + << " spilled at " << getBasicBlockName(MBB) + << " are never restored on path to return " + << getBasicBlockName(SBB) << "\n"); + } + restored.clear(); + } + } + } + + // Check restore placements. + for (CSRegBlockMap::iterator BI = CSRRestore.begin(), + BE = CSRRestore.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet restored = BI->second; + CSRegSet spilled; + + if (restored.empty()) + continue; + + DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(restored) << "\n"); + + if (CSRSave[MBB].intersects(restored)) { + spilled |= (CSRSave[MBB] & restored); + } + // Walk inverse depth first from MBB to find spills of all + // CSRs restored at MBB: + for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), + BE = idf_end(MBB); BI != BE; ++BI) { + MachineBasicBlock* PBB = *BI; + if (PBB == MBB) + continue; + // Stop when we encounter restores of any CSRs restored at MBB that + // have not yet been seen to be spilled. + if (CSRRestore[PBB].intersects(restored) && + !spilled.contains(CSRRestore[PBB] & restored)) + break; + // Collect the CSRs restored at MBB that are spilled + // at this DF predecessor of MBB. + if (CSRSave[PBB].intersects(restored)) + spilled |= (CSRSave[PBB] & restored); + } + if (spilled != restored) { + CSRegSet notSpilled = (restored - spilled); + DEBUG(dbgs() << MF->getFunction()->getName() << ": " + << stringifyCSRegSet(notSpilled) + << " restored at " << getBasicBlockName(MBB) + << " are never spilled\n"); + } + } +} + +// Debugging print methods. +std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { + if (!MBB) + return ""; + + if (MBB->getBasicBlock()) + return MBB->getBasicBlock()->getNameStr(); + + std::ostringstream name; + name << "_MBB_" << MBB->getNumber(); + return name.str(); +} + +std::string PEI::stringifyCSRegSet(const CSRegSet& s) { + const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); + const std::vector<CalleeSavedInfo> CSI = + MF->getFrameInfo()->getCalleeSavedInfo(); + + std::ostringstream srep; + if (CSI.size() == 0) { + srep << "[]"; + return srep.str(); + } + srep << "["; + CSRegSet::iterator I = s.begin(), E = s.end(); + if (I != E) { + unsigned reg = CSI[*I].getReg(); + srep << TRI->getName(reg); + for (++I; I != E; ++I) { + reg = CSI[*I].getReg(); + srep << ","; + srep << TRI->getName(reg); + } + } + srep << "]"; + return srep.str(); +} + +void PEI::dumpSet(const CSRegSet& s) { + DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); +} + +void PEI::dumpUsed(MachineBasicBlock* MBB) { + DEBUG({ + if (MBB) + dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; + }); +} + +void PEI::dumpAllUsed() { + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpUsed(MBB); + } +} + +void PEI::dumpSets(MachineBasicBlock* MBB) { + DEBUG({ + if (MBB) + dbgs() << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << "\n"; + }); +} + +void PEI::dumpSets1(MachineBasicBlock* MBB) { + DEBUG({ + if (MBB) + dbgs() << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << " | " + << stringifyCSRegSet(CSRSave[MBB]) << " | " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + }); +} + +void PEI::dumpAllSets() { + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpSets1(MBB); + } +} + +void PEI::dumpSRSets() { + DEBUG({ + for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); + MBB != E; ++MBB) { + if (!CSRSave[MBB].empty()) { + dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]); + if (CSRRestore[MBB].empty()) + dbgs() << '\n'; + } + + if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) + dbgs() << " " + << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + } + }); +} +#endif diff --git a/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp new file mode 100644 index 0000000..b29ea19 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -0,0 +1,1844 @@ +//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple register coalescing pass that attempts to +// aggressively coalesce every register copy that it can. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regcoalescing" +#include "SimpleRegisterCoalescing.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Value.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <cmath> +using namespace llvm; + +STATISTIC(numJoins , "Number of interval joins performed"); +STATISTIC(numCrossRCs , "Number of cross class joins performed"); +STATISTIC(numCommutes , "Number of instruction commuting performed"); +STATISTIC(numExtends , "Number of copies extended"); +STATISTIC(NumReMats , "Number of instructions re-materialized"); +STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); +STATISTIC(numAborts , "Number of times interval joining aborted"); +STATISTIC(numDeadValNo, "Number of valno def marked dead"); + +char SimpleRegisterCoalescing::ID = 0; +static cl::opt<bool> +EnableJoining("join-liveintervals", + cl::desc("Coalesce copies (default=true)"), + cl::init(true)); + +static cl::opt<bool> +DisableCrossClassJoin("disable-cross-class-join", + cl::desc("Avoid coalescing cross register class copies"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +DisablePhysicalJoin("disable-physical-join", + cl::desc("Avoid coalescing physical register copies"), + cl::init(false), cl::Hidden); + +INITIALIZE_AG_PASS(SimpleRegisterCoalescing, RegisterCoalescer, + "simple-register-coalescing", "Simple Register Coalescing", + false, false, true); + +char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID; + +void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + if (StrongPHIElim) + AU.addPreservedID(StrongPHIEliminationID); + else + AU.addPreservedID(PHIEliminationID); + AU.addPreservedID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// being the source and IntB being the dest, thus this defines a value number +/// in IntB. If the source value number (in IntA) is defined by a copy from B, +/// see if we can merge these two pieces of B into a single value number, +/// eliminating a copy. For example: +/// +/// A3 = B0 +/// ... +/// B1 = A3 <- this copy +/// +/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 +/// value number to be replaced with B0 (which simplifies the B liveinterval). +/// +/// This returns true if an interval was modified. +/// +bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, + MachineInstr *CopyMI) { + // Bail if there is no dst interval - can happen when merging physical subreg + // operations. + if (!li_->hasInterval(CP.getDstReg())) + return false; + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); + if (BLR == IntB.end()) return false; + VNInfo *BValNo = BLR->valno; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + if (!BValNo->getCopy()) return false; + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); + LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); + // The live range might not exist after fun with physreg coalescing. + if (ALR == IntA.end()) return false; + VNInfo *AValNo = ALR->valno; + // If it's re-defined by an early clobber somewhere in the live range, then + // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. + // See PR3149: + // 172 %ECX<def> = MOV32rr %reg1039<kill> + // 180 INLINEASM <es:subl $5,$1 + // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, + // %EAX<kill>, + // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0 + // 188 %EAX<def> = MOV32rr %EAX<kill> + // 196 %ECX<def> = MOV32rr %ECX<kill> + // 204 %ECX<def> = MOV32rr %ECX<kill> + // 212 %EAX<def> = MOV32rr %EAX<kill> + // 220 %EAX<def> = MOV32rr %EAX + // 228 %reg1039<def> = MOV32rr %ECX<kill> + // The early clobber operand ties ECX input to the ECX def. + // + // The live interval of ECX is represented as this: + // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) + // The coalescer has no idea there was a def in the middle of [174,230]. + if (AValNo->hasRedefByEC()) + return false; + + // If AValNo is defined as a copy from IntB, we can potentially process this. + // Get the instruction that defines this value number. + if (!CP.isCoalescable(AValNo->getCopy())) + return false; + + // Get the LiveRange in IntB that this value number starts with. + LiveInterval::iterator ValLR = + IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); + if (ValLR == IntB.end()) + return false; + + // Make sure that the end of the live range is inside the same block as + // CopyMI. + MachineInstr *ValLREndInst = + li_->getInstructionFromIndex(ValLR->end.getPrevSlot()); + if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + return false; + + // Okay, we now know that ValLR ends in the same block that the CopyMI + // live-range starts. If there are no intervening live ranges between them in + // IntB, we can merge them. + if (ValLR+1 != BLR) return false; + + // If a live interval is a physical register, conservatively check if any + // of its sub-registers is overlapping the live interval of the virtual + // register. If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) && + *tri_->getSubRegisters(IntB.reg)) { + for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) + if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { + DEBUG({ + dbgs() << "\t\tInterfere with sub-register "; + li_->getInterval(*SR).print(dbgs(), tri_); + }); + return false; + } + } + + DEBUG({ + dbgs() << "Extending: "; + IntB.print(dbgs(), tri_); + }); + + SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; + // We are about to delete CopyMI, so need to remove it as the 'instruction + // that defines this value #'. Update the valnum with the new defining + // instruction #. + BValNo->def = FillerStart; + BValNo->setCopy(0); + + // Okay, we can merge them. We need to insert a new liverange: + // [ValLR.end, BLR.begin) of either value number, then we merge the + // two value numbers. + IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + + // If the IntB live range is assigned to a physical register, and if that + // physreg has sub-registers, update their live intervals as well. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &SRLI = li_->getInterval(*SR); + SRLI.addRange(LiveRange(FillerStart, FillerEnd, + SRLI.getNextValue(FillerStart, 0, true, + li_->getVNInfoAllocator()))); + } + } + + // Okay, merge "B1" into the same value number as "B0". + if (BValNo != ValLR->valno) { + IntB.MergeValueNumberInto(BValNo, ValLR->valno); + } + DEBUG({ + dbgs() << " result = "; + IntB.print(dbgs(), tri_); + dbgs() << "\n"; + }); + + // If the source instruction was killing the source register before the + // merge, unset the isKill marker given the live range has been extended. + int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + if (UIdx != -1) { + ValLREndInst->getOperand(UIdx).setIsKill(false); + } + + // If the copy instruction was killing the destination register before the + // merge, find the last use and trim the live range. That will also add the + // isKill marker. + if (ALR->end == CopyIdx) + TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR); + + ++numExtends; + return true; +} + +/// HasOtherReachingDefs - Return true if there are definitions of IntB +/// other than BValNo val# that can reach uses of AValno val# of IntA. +bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, + LiveInterval &IntB, + VNInfo *AValNo, + VNInfo *BValNo) { + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + LiveInterval::Ranges::iterator BI = + std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); + if (BI != IntB.ranges.begin()) + --BI; + for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { + if (BI->valno == BValNo) + continue; + // When BValNo is null, we're looking for a dummy clobber-value for a subreg. + if (!BValNo && !BI->valno->isDefAccurate() && !BI->valno->getCopy()) + continue; + if (BI->start <= AI->start && BI->end > AI->start) + return true; + if (BI->start > AI->start && BI->start < AI->end) + return true; + } + } + return false; +} + +static void +TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) { + for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isImplicit()) + NewMI->addOperand(MO); + } +} + +/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with +/// IntA being the source and IntB being the dest, thus this defines a value +/// number in IntB. If the source value number (in IntA) is defined by a +/// commutable instruction and its other operand is coalesced to the copy dest +/// register, see if we can transform the copy into a noop by commuting the +/// definition. For example, +/// +/// A3 = op A2 B0<kill> +/// ... +/// B1 = A3 <- this copy +/// ... +/// = op A3 <- more uses +/// +/// ==> +/// +/// B2 = op B0 A2<kill> +/// ... +/// B1 = B2 <- now an identify copy +/// ... +/// = op B2 <- more uses +/// +/// This returns true if an interval was modified. +/// +bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, + MachineInstr *CopyMI) { + // FIXME: For now, only eliminate the copy by commuting its def when the + // source register is a virtual register. We want to guard against cases + // where the copy is a back edge copy and commuting the def lengthen the + // live interval of the source register to the entire loop. + if (CP.isPhys() && CP.isFlipped()) + return false; + + // Bail if there is no dst interval. + if (!li_->hasInterval(CP.getDstReg())) + return false; + + SlotIndex CopyIdx = + li_->getInstructionIndex(CopyMI).getDefIndex(); + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); + if (BLR == IntB.end()) return false; + VNInfo *BValNo = BLR->valno; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + if (!BValNo->getCopy()) return false; + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + LiveInterval::iterator ALR = + IntA.FindLiveRangeContaining(CopyIdx.getUseIndex()); // + + assert(ALR != IntA.end() && "Live range not found!"); + VNInfo *AValNo = ALR->valno; + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be + // tested? + if (AValNo->isPHIDef() || !AValNo->isDefAccurate() || + AValNo->isUnused() || AValNo->hasPHIKill()) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); + if (!DefMI) + return false; + const TargetInstrDesc &TID = DefMI->getDesc(); + if (!TID.isCommutable()) + return false; + // If DefMI is a two-address instruction then commuting it will change the + // destination register. + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); + assert(DefIdx != -1); + unsigned UseOpIdx; + if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) + return false; + unsigned Op1, Op2, NewDstIdx; + if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2)) + return false; + if (Op1 == UseOpIdx) + NewDstIdx = Op2; + else if (Op2 == UseOpIdx) + NewDstIdx = Op1; + else + return false; + + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (NewReg != IntB.reg || !NewDstMO.isKill()) + return false; + + // Make sure there are no other definitions of IntB that would reach the + // uses which the new definition can reach. + if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) + return false; + + // Abort if the aliases of IntB.reg have values that are not simply the + // clobbers from the superreg. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && + HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0)) + return false; + + // If some of the uses of IntA.reg is already coalesced away, return false. + // It's not possible to determine whether it's safe to perform the coalescing. + for (MachineRegisterInfo::use_nodbg_iterator UI = + mri_->use_nodbg_begin(IntA.reg), + UE = mri_->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + SlotIndex UseIdx = li_->getInstructionIndex(UseMI); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end()) + continue; + if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) + return false; + } + + DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << *DefMI); + + // At this point we have decided that it is legal to do this + // transformation. Start by commuting the instruction. + MachineBasicBlock *MBB = DefMI->getParent(); + MachineInstr *NewMI = tii_->commuteInstruction(DefMI); + if (!NewMI) + return false; + if (NewMI != DefMI) { + li_->ReplaceMachineInstrInMaps(DefMI, NewMI); + MBB->insert(DefMI, NewMI); + MBB->erase(DefMI); + } + unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); + NewMI->getOperand(OpIdx).setIsKill(); + + bool BHasPHIKill = BValNo->hasPHIKill(); + SmallVector<VNInfo*, 4> BDeadValNos; + std::map<SlotIndex, SlotIndex> BExtend; + + // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. + // A = or A, B + // ... + // B = A + // ... + // C = A<kill> + // ... + // = B + bool Extended = BLR->end > ALR->end && ALR->end != ALR->start; + if (Extended) + BExtend[ALR->end] = BLR->end; + + // Update uses of IntA of the specific Val# with IntB. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), + UE = mri_->use_end(); UI != UE;) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (JoinedCopies.count(UseMI)) + continue; + if (UseMI->isDebugValue()) { + // FIXME These don't have an instruction index. Not clear we have enough + // info to decide whether to do this replacement or not. For now do it. + UseMO.setReg(NewReg); + continue; + } + SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex(); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end() || ULR->valno != AValNo) + continue; + if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + UseMO.substPhysReg(NewReg, *tri_); + else + UseMO.setReg(NewReg); + if (UseMI == CopyMI) + continue; + if (UseMO.isKill()) { + if (Extended) + UseMO.setIsKill(false); + } + if (!UseMI->isCopy()) + continue; + if (UseMI->getOperand(0).getReg() != IntB.reg || + UseMI->getOperand(0).getSubReg()) + continue; + + // This copy will become a noop. If it's defining a new val#, + // remove that val# as well. However this live range is being + // extended to the end of the existing live range defined by the copy. + SlotIndex DefIdx = UseIdx.getDefIndex(); + const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); + if (!DLR) + continue; + BHasPHIKill |= DLR->valno->hasPHIKill(); + assert(DLR->valno->def == DefIdx); + BDeadValNos.push_back(DLR->valno); + BExtend[DLR->start] = DLR->end; + JoinedCopies.insert(UseMI); + } + + // We need to insert a new liverange: [ALR.start, LastUse). It may be we can + // simply extend BLR if CopyMI doesn't end the range. + DEBUG({ + dbgs() << "Extending: "; + IntB.print(dbgs(), tri_); + }); + + // Remove val#'s defined by copies that will be coalesced away. + for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) { + VNInfo *DeadVNI = BDeadValNos[i]; + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) { + if (!li_->hasInterval(*AS)) + continue; + LiveInterval &ASLI = li_->getInterval(*AS); + if (const LiveRange *ASLR = ASLI.getLiveRangeContaining(DeadVNI->def)) + ASLI.removeValNo(ASLR->valno); + } + } + IntB.removeValNo(BDeadValNos[i]); + } + + // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition + // is updated. + VNInfo *ValNo = BValNo; + ValNo->def = AValNo->def; + ValNo->setCopy(0); + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + SlotIndex End = AI->end; + std::map<SlotIndex, SlotIndex>::iterator + EI = BExtend.find(End); + if (EI != BExtend.end()) + End = EI->second; + IntB.addRange(LiveRange(AI->start, End, ValNo)); + } + ValNo->setHasPHIKill(BHasPHIKill); + + DEBUG({ + dbgs() << " result = "; + IntB.print(dbgs(), tri_); + dbgs() << "\nShortening: "; + IntA.print(dbgs(), tri_); + }); + + IntA.removeValNo(AValNo); + + DEBUG({ + dbgs() << " result = "; + IntA.print(dbgs(), tri_); + dbgs() << '\n'; + }); + + ++numCommutes; + return true; +} + +/// isSameOrFallThroughBB - Return true if MBB == SuccMBB or MBB simply +/// fallthoughs to SuccMBB. +static bool isSameOrFallThroughBB(MachineBasicBlock *MBB, + MachineBasicBlock *SuccMBB, + const TargetInstrInfo *tii_) { + if (MBB == SuccMBB) + return true; + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + return !tii_->AnalyzeBranch(*MBB, TBB, FBB, Cond) && !TBB && !FBB && + MBB->isSuccessor(SuccMBB); +} + +/// removeRange - Wrapper for LiveInterval::removeRange. This removes a range +/// from a physical register live interval as well as from the live intervals +/// of its sub-registers. +static void removeRange(LiveInterval &li, + SlotIndex Start, SlotIndex End, + LiveIntervals *li_, const TargetRegisterInfo *tri_) { + li.removeRange(Start, End, true); + if (TargetRegisterInfo::isPhysicalRegister(li.reg)) { + for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &sli = li_->getInterval(*SR); + SlotIndex RemoveStart = Start; + SlotIndex RemoveEnd = Start; + + while (RemoveEnd != End) { + LiveInterval::iterator LR = sli.FindLiveRangeContaining(RemoveStart); + if (LR == sli.end()) + break; + RemoveEnd = (LR->end < End) ? LR->end : End; + sli.removeRange(RemoveStart, RemoveEnd, true); + RemoveStart = RemoveEnd; + } + } + } +} + +/// TrimLiveIntervalToLastUse - If there is a last use in the same basic block +/// as the copy instruction, trim the live interval to the last use and return +/// true. +bool +SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx, + MachineBasicBlock *CopyMBB, + LiveInterval &li, + const LiveRange *LR) { + SlotIndex MBBStart = li_->getMBBStartIdx(CopyMBB); + SlotIndex LastUseIdx; + MachineOperand *LastUse = + lastRegisterUse(LR->start, CopyIdx.getPrevSlot(), li.reg, LastUseIdx); + if (LastUse) { + MachineInstr *LastUseMI = LastUse->getParent(); + if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) { + // r1024 = op + // ... + // BB1: + // = r1024 + // + // BB2: + // r1025<dead> = r1024<kill> + if (MBBStart < LR->end) + removeRange(li, MBBStart, LR->end, li_, tri_); + return true; + } + + // There are uses before the copy, just shorten the live range to the end + // of last use. + LastUse->setIsKill(); + removeRange(li, LastUseIdx.getDefIndex(), LR->end, li_, tri_); + if (LastUseMI->isCopy()) { + MachineOperand &DefMO = LastUseMI->getOperand(0); + if (DefMO.getReg() == li.reg && !DefMO.getSubReg()) + DefMO.setIsDead(); + } + return true; + } + + // Is it livein? + if (LR->start <= MBBStart && LR->end > MBBStart) { + if (LR->start == li_->getZeroIndex()) { + assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); + // Live-in to the function but dead. Remove it from entry live-in set. + mf_->begin()->removeLiveIn(li.reg); + } + // FIXME: Shorten intervals in BBs that reaches this BB. + } + + return false; +} + +/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial +/// computation, replace the copy by rematerialize the definition. +bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, + unsigned DstReg, + unsigned DstSubIdx, + MachineInstr *CopyMI) { + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex(); + LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); + assert(SrcLR != SrcInt.end() && "Live range not found!"); + VNInfo *ValNo = SrcLR->valno; + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be + // tested? + if (ValNo->isPHIDef() || !ValNo->isDefAccurate() || + ValNo->isUnused() || ValNo->hasPHIKill()) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); + assert(DefMI && "Defining instruction disappeared"); + const TargetInstrDesc &TID = DefMI->getDesc(); + if (!TID.isAsCheapAsAMove()) + return false; + if (!tii_->isTriviallyReMaterializable(DefMI, AA)) + return false; + bool SawStore = false; + if (!DefMI->isSafeToMove(tii_, AA, SawStore)) + return false; + if (TID.getNumDefs() != 1) + return false; + if (!DefMI->isImplicitDef()) { + // Make sure the copy destination register class fits the instruction + // definition register class. The mismatch can happen as a result of earlier + // extract_subreg, insert_subreg, subreg_to_reg coalescing. + const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_); + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + if (mri_->getRegClass(DstReg) != RC) + return false; + } else if (!RC->contains(DstReg)) + return false; + } + + // If destination register has a sub-register index on it, make sure it mtches + // the instruction register class. + if (DstSubIdx) { + const TargetInstrDesc &TID = DefMI->getDesc(); + if (TID.getNumDefs() != 1) + return false; + const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); + const TargetRegisterClass *DstSubRC = + DstRC->getSubRegisterRegClass(DstSubIdx); + const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_); + if (DefRC == DstRC) + DstSubIdx = 0; + else if (DefRC != DstSubRC) + return false; + } + + RemoveCopyFlag(DstReg, CopyMI); + + // If copy kills the source register, find the last use and propagate + // kill. + bool checkForDeadDef = false; + MachineBasicBlock *MBB = CopyMI->getParent(); + if (SrcLR->end == CopyIdx.getDefIndex()) + if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) { + checkForDeadDef = true; + } + + MachineBasicBlock::iterator MII = + llvm::next(MachineBasicBlock::iterator(CopyMI)); + tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); + MachineInstr *NewMI = prior(MII); + + if (checkForDeadDef) { + // PR4090 fix: Trim interval failed because there was no use of the + // source interval in this MBB. If the def is in this MBB too then we + // should mark it dead: + if (DefMI->getParent() == MBB) { + DefMI->addRegisterDead(SrcInt.reg, tri_); + SrcLR->end = SrcLR->start.getNextSlot(); + } + } + + // CopyMI may have implicit operands, transfer them over to the newly + // rematerialized instruction. And update implicit def interval valnos. + for (unsigned i = CopyMI->getDesc().getNumOperands(), + e = CopyMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = CopyMI->getOperand(i); + if (MO.isReg() && MO.isImplicit()) + NewMI->addOperand(MO); + if (MO.isDef()) + RemoveCopyFlag(MO.getReg(), CopyMI); + } + + TransferImplicitOps(CopyMI, NewMI); + li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); + CopyMI->eraseFromParent(); + ReMatCopies.insert(CopyMI); + ReMatDefs.insert(DefMI); + DEBUG(dbgs() << "Remat: " << *NewMI); + ++NumReMats; + return true; +} + +/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and +/// update the subregister number if it is not zero. If DstReg is a +/// physical register and the existing subregister number of the def / use +/// being updated is not zero, make sure to set it to the correct physical +/// subregister. +void +SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { + bool DstIsPhys = CP.isPhys(); + unsigned SrcReg = CP.getSrcReg(); + unsigned DstReg = CP.getDstReg(); + unsigned SubIdx = CP.getSubIdx(); + + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); + MachineInstr *UseMI = I.skipInstruction();) { + // A PhysReg copy that won't be coalesced can perhaps be rematerialized + // instead. + if (DstIsPhys) { + if (UseMI->isCopy() && + !UseMI->getOperand(1).getSubReg() && + !UseMI->getOperand(0).getSubReg() && + UseMI->getOperand(1).getReg() == SrcReg && + UseMI->getOperand(0).getReg() != SrcReg && + UseMI->getOperand(0).getReg() != DstReg && + !JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), + UseMI->getOperand(0).getReg(), 0, UseMI)) + continue; + } + + SmallVector<unsigned,8> Ops; + bool Reads, Writes; + tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + bool Kills = false, Deads = false; + + // Replace SrcReg with DstReg in all UseMI operands. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(Ops[i]); + Kills |= MO.isKill(); + Deads |= MO.isDead(); + + if (DstIsPhys) + MO.substPhysReg(DstReg, *tri_); + else + MO.substVirtReg(DstReg, SubIdx, *tri_); + } + + // This instruction is a copy that will be removed. + if (JoinedCopies.count(UseMI)) + continue; + + if (SubIdx) { + // If UseMI was a simple SrcReg def, make sure we didn't turn it into a + // read-modify-write of DstReg. + if (Deads) + UseMI->addRegisterDead(DstReg, tri_); + else if (!Reads && Writes) + UseMI->addRegisterDefined(DstReg, tri_); + + // Kill flags apply to the whole physical register. + if (DstIsPhys && Kills) + UseMI->addRegisterKilled(DstReg, tri_); + } + + DEBUG({ + dbgs() << "\t\tupdated: "; + if (!UseMI->isDebugValue()) + dbgs() << li_->getInstructionIndex(UseMI) << "\t"; + dbgs() << *UseMI; + }); + } +} + +/// removeIntervalIfEmpty - Check if the live interval of a physical register +/// is empty, if so remove it and also remove the empty intervals of its +/// sub-registers. Return true if live interval is removed. +static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, + const TargetRegisterInfo *tri_) { + if (li.empty()) { + if (TargetRegisterInfo::isPhysicalRegister(li.reg)) + for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &sli = li_->getInterval(*SR); + if (sli.empty()) + li_->removeInterval(*SR); + } + li_->removeInterval(li.reg); + return true; + } + return false; +} + +/// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. +/// Return true if live interval is removed. +bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li, + MachineInstr *CopyMI) { + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI); + LiveInterval::iterator MLR = + li.FindLiveRangeContaining(CopyIdx.getDefIndex()); + if (MLR == li.end()) + return false; // Already removed by ShortenDeadCopySrcLiveRange. + SlotIndex RemoveStart = MLR->start; + SlotIndex RemoveEnd = MLR->end; + SlotIndex DefIdx = CopyIdx.getDefIndex(); + // Remove the liverange that's defined by this. + if (RemoveStart == DefIdx && RemoveEnd == DefIdx.getStoreIndex()) { + removeRange(li, RemoveStart, RemoveEnd, li_, tri_); + return removeIntervalIfEmpty(li, li_, tri_); + } + return false; +} + +/// RemoveDeadDef - If a def of a live interval is now determined dead, remove +/// the val# it defines. If the live interval becomes empty, remove it as well. +bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, + MachineInstr *DefMI) { + SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex(); + LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); + if (DefIdx != MLR->valno->def) + return false; + li.removeValNo(MLR->valno); + return removeIntervalIfEmpty(li, li_, tri_); +} + +void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg, + const MachineInstr *CopyMI) { + SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + if (li_->hasInterval(DstReg)) { + LiveInterval &LI = li_->getInterval(DstReg); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->getCopy() == CopyMI) + LR->valno->setCopy(0); + } + if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) + return; + for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) { + if (!li_->hasInterval(*AS)) + continue; + LiveInterval &LI = li_->getInterval(*AS); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->getCopy() == CopyMI) + LR->valno->setCopy(0); + } +} + +/// PropagateDeadness - Propagate the dead marker to the instruction which +/// defines the val#. +static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, + SlotIndex &LRStart, LiveIntervals *li_, + const TargetRegisterInfo* tri_) { + MachineInstr *DefMI = + li_->getInstructionFromIndex(LRStart.getDefIndex()); + if (DefMI && DefMI != CopyMI) { + int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg); + if (DeadIdx != -1) + DefMI->getOperand(DeadIdx).setIsDead(); + else + DefMI->addOperand(MachineOperand::CreateReg(li.reg, + /*def*/true, /*implicit*/true, /*kill*/false, /*dead*/true)); + LRStart = LRStart.getNextSlot(); + } +} + +/// ShortenDeadCopySrcLiveRange - Shorten a live range as it's artificially +/// extended by a dead copy. Mark the last use (if any) of the val# as kill as +/// ends the live range there. If there isn't another use, then this live range +/// is dead. Return true if live interval is removed. +bool +SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, + MachineInstr *CopyMI) { + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI); + if (CopyIdx == SlotIndex()) { + // FIXME: special case: function live in. It can be a general case if the + // first instruction index starts at > 0 value. + assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); + // Live-in to the function but dead. Remove it from entry live-in set. + if (mf_->begin()->isLiveIn(li.reg)) + mf_->begin()->removeLiveIn(li.reg); + if (const LiveRange *LR = li.getLiveRangeContaining(CopyIdx)) + removeRange(li, LR->start, LR->end, li_, tri_); + return removeIntervalIfEmpty(li, li_, tri_); + } + + LiveInterval::iterator LR = + li.FindLiveRangeContaining(CopyIdx.getPrevIndex().getStoreIndex()); + if (LR == li.end()) + // Livein but defined by a phi. + return false; + + SlotIndex RemoveStart = LR->start; + SlotIndex RemoveEnd = CopyIdx.getStoreIndex(); + if (LR->end > RemoveEnd) + // More uses past this copy? Nothing to do. + return false; + + // If there is a last use in the same bb, we can't remove the live range. + // Shorten the live interval and return. + MachineBasicBlock *CopyMBB = CopyMI->getParent(); + if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR)) + return false; + + // There are other kills of the val#. Nothing to do. + if (!li.isOnlyLROfValNo(LR)) + return false; + + MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart); + if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_)) + // If the live range starts in another mbb and the copy mbb is not a fall + // through mbb, then we can only cut the range from the beginning of the + // copy mbb. + RemoveStart = li_->getMBBStartIdx(CopyMBB).getNextIndex().getBaseIndex(); + + if (LR->valno->def == RemoveStart) { + // If the def MI defines the val# and this copy is the only kill of the + // val#, then propagate the dead marker. + PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_); + ++numDeadValNo; + } + + removeRange(li, RemoveStart, RemoveEnd, li_, tri_); + return removeIntervalIfEmpty(li, li_, tri_); +} + + +/// isWinToJoinCrossClass - Return true if it's profitable to coalesce +/// two virtual registers from different register classes. +bool +SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC) { + unsigned NewRCCount = allocatableRCRegs_[NewRC].count(); + // This heuristics is good enough in practice, but it's obviously not *right*. + // 4 is a magic number that works well enough for x86, ARM, etc. It filter + // out all but the most restrictive register classes. + if (NewRCCount > 4 || + // Early exit if the function is fairly small, coalesce aggressively if + // that's the case. For really special register classes with 3 or + // fewer registers, be a bit more careful. + (li_->getFuncInstructionCount() / NewRCCount) < 8) + return true; + LiveInterval &SrcInt = li_->getInterval(SrcReg); + LiveInterval &DstInt = li_->getInterval(DstReg); + unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); + unsigned DstSize = li_->getApproximateInstructionCount(DstInt); + if (SrcSize <= NewRCCount && DstSize <= NewRCCount) + return true; + // Estimate *register use density*. If it doubles or more, abort. + unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), + mri_->use_nodbg_end()); + unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), + mri_->use_nodbg_end()); + unsigned NewUses = SrcUses + DstUses; + unsigned NewSize = SrcSize + DstSize; + if (SrcRC != NewRC && SrcSize > NewRCCount) { + unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count(); + if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) + return false; + } + if (DstRC != NewRC && DstSize > NewRCCount) { + unsigned DstRCCount = allocatableRCRegs_[DstRC].count(); + if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) + return false; + } + return true; +} + + +/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// which are the src/dst of the copy instruction CopyMI. This returns true +/// if the copy was successfully coalesced away. If it is not currently +/// possible to coalesce this interval, but it may be possible if other +/// things get coalesced, then it returns true by reference in 'Again'. +bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { + MachineInstr *CopyMI = TheCopy.MI; + + Again = false; + if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) + return false; // Already done. + + DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); + + CoalescerPair CP(*tii_, *tri_); + if (!CP.setRegisters(CopyMI)) { + DEBUG(dbgs() << "\tNot coalescable.\n"); + return false; + } + + // If they are already joined we continue. + if (CP.getSrcReg() == CP.getDstReg()) { + DEBUG(dbgs() << "\tCopy already coalesced.\n"); + return false; // Not coalescable. + } + + if (DisablePhysicalJoin && CP.isPhys()) { + DEBUG(dbgs() << "\tPhysical joins disabled.\n"); + return false; + } + + DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg()); + + // Enforce policies. + if (CP.isPhys()) { + DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n"); + // Only coalesce to allocatable physreg. + if (!li_->isAllocatable(CP.getDstReg())) { + DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); + return false; // Not coalescable. + } + } else { + DEBUG({ + dbgs() << " with reg%" << CP.getDstReg(); + if (CP.getSubIdx()) + dbgs() << ":" << tri_->getSubRegIndexName(CP.getSubIdx()); + dbgs() << " to " << CP.getNewRC()->getName() << "\n"; + }); + + // Avoid constraining virtual register regclass too much. + if (CP.isCrossClass()) { + if (DisableCrossClassJoin) { + DEBUG(dbgs() << "\tCross-class joins disabled.\n"); + return false; + } + if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), + mri_->getRegClass(CP.getSrcReg()), + mri_->getRegClass(CP.getDstReg()), + CP.getNewRC())) { + DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " + << CP.getNewRC()->getName() << ".\n"); + Again = true; // May be possible to coalesce later. + return false; + } + } + + // When possible, let DstReg be the larger interval. + if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() > + li_->getInterval(CP.getDstReg()).ranges.size()) + CP.flip(); + } + + // We need to be careful about coalescing a source physical register with a + // virtual register. Once the coalescing is done, it cannot be broken and + // these are not spillable! If the destination interval uses are far away, + // think twice about coalescing them! + // FIXME: Why are we skipping this test for partial copies? + // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. + if (!CP.isPartial() && CP.isPhys()) { + LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + + // Don't join with physregs that have a ridiculous number of live + // ranges. The data structure performance is really bad when that + // happens. + if (li_->hasInterval(CP.getDstReg()) && + li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { + ++numAborts; + DEBUG(dbgs() + << "\tPhysical register live interval too complicated, abort!\n"); + return false; + } + + const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + if (Length > Threshold && + std::distance(mri_->use_nodbg_begin(CP.getSrcReg()), + mri_->use_nodbg_end()) * Threshold < Length) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI)) + return true; + + ++numAborts; + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); + Again = true; // May be possible to coalesce later. + return false; + } + } + + // Okay, attempt to join these two intervals. On failure, this returns false. + // Otherwise, if one of the intervals being joined is a physreg, this method + // always canonicalizes DstInt to be it. The output "SrcInt" will not have + // been modified, so we can use this information below to update aliases. + if (!JoinIntervals(CP)) { + // Coalescing failed. + + // If definition of source is defined by trivial computation, try + // rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), + CP.getDstReg(), 0, CopyMI)) + return true; + + // If we can eliminate the copy without merging the live ranges, do so now. + if (!CP.isPartial()) { + if (AdjustCopiesBackFrom(CP, CopyMI) || + RemoveCopyByCommutingDef(CP, CopyMI)) { + JoinedCopies.insert(CopyMI); + DEBUG(dbgs() << "\tTrivial!\n"); + return true; + } + } + + // Otherwise, we are unable to join the intervals. + DEBUG(dbgs() << "\tInterference!\n"); + Again = true; // May be possible to coalesce later. + return false; + } + + // Coalescing to a virtual register that is of a sub-register class of the + // other. Make sure the resulting register is set to the right register class. + if (CP.isCrossClass()) { + ++numCrossRCs; + mri_->setRegClass(CP.getDstReg(), CP.getNewRC()); + } + + // Remember to delete the copy instruction. + JoinedCopies.insert(CopyMI); + + UpdateRegDefsUses(CP); + + // If we have extended the live range of a physical register, make sure we + // update live-in lists as well. + if (CP.isPhys()) { + SmallVector<MachineBasicBlock*, 16> BlockSeq; + // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the + // ranges for this, and they are preserved. + LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg()); + for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); + I != E; ++I ) { + li_->findLiveInMBBs(I->start, I->end, BlockSeq); + for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { + MachineBasicBlock &block = *BlockSeq[idx]; + if (!block.isLiveIn(CP.getDstReg())) + block.addLiveIn(CP.getDstReg()); + } + BlockSeq.clear(); + } + } + + // SrcReg is guarateed to be the register whose live interval that is + // being merged. + li_->removeInterval(CP.getSrcReg()); + + // Update regalloc hint. + tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_); + + DEBUG({ + LiveInterval &DstInt = li_->getInterval(CP.getDstReg()); + dbgs() << "\tJoined. Result = "; + DstInt.print(dbgs(), tri_); + dbgs() << "\n"; + }); + + ++numJoins; + return true; +} + +/// ComputeUltimateVN - Assuming we are going to join two live intervals, +/// compute what the resultant value numbers for each value in the input two +/// ranges will be. This is complicated by copies between the two which can +/// and will commonly cause multiple value numbers to be merged into one. +/// +/// VN is the value number that we're trying to resolve. InstDefiningValue +/// keeps track of the new InstDefiningValue assignment for the result +/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of +/// whether a value in this or other is a copy from the opposite set. +/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have +/// already been assigned. +/// +/// ThisFromOther[x] - If x is defined as a copy from the other interval, this +/// contains the value number the copy is from. +/// +static unsigned ComputeUltimateVN(VNInfo *VNI, + SmallVector<VNInfo*, 16> &NewVNInfo, + DenseMap<VNInfo*, VNInfo*> &ThisFromOther, + DenseMap<VNInfo*, VNInfo*> &OtherFromThis, + SmallVector<int, 16> &ThisValNoAssignments, + SmallVector<int, 16> &OtherValNoAssignments) { + unsigned VN = VNI->id; + + // If the VN has already been computed, just return it. + if (ThisValNoAssignments[VN] >= 0) + return ThisValNoAssignments[VN]; + assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); + + // If this val is not a copy from the other val, then it must be a new value + // number in the destination. + DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); + if (I == ThisFromOther.end()) { + NewVNInfo.push_back(VNI); + return ThisValNoAssignments[VN] = NewVNInfo.size()-1; + } + VNInfo *OtherValNo = I->second; + + // Otherwise, this *is* a copy from the RHS. If the other side has already + // been computed, return it. + if (OtherValNoAssignments[OtherValNo->id] >= 0) + return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; + + // Mark this value number as currently being computed, then ask what the + // ultimate value # of the other value is. + ThisValNoAssignments[VN] = -2; + unsigned UltimateVN = + ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, + OtherValNoAssignments, ThisValNoAssignments); + return ThisValNoAssignments[VN] = UltimateVN; +} + +/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. +bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) { + LiveInterval &RHS = li_->getInterval(CP.getSrcReg()); + DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; }); + + // If a live interval is a physical register, check for interference with any + // aliases. The interference check implemented here is a bit more conservative + // than the full interfeence check below. We allow overlapping live ranges + // only when one is a copy of the other. + if (CP.isPhys()) { + for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){ + if (!li_->hasInterval(*AS)) + continue; + const LiveInterval &LHS = li_->getInterval(*AS); + LiveInterval::const_iterator LI = LHS.begin(); + for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); + RI != RE; ++RI) { + LI = std::lower_bound(LI, LHS.end(), RI->start); + // Does LHS have an overlapping live range starting before RI? + if ((LI != LHS.begin() && LI[-1].end > RI->start) && + (RI->start != RI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; + }); + return false; + } + + // Check that LHS ranges beginning in this range are copies. + for (; LI != LHS.end() && LI->start < RI->end; ++LI) { + if (LI->start != LI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; + }); + return false; + } + } + } + } + } + + // Compute the final value assignment, assuming that the live ranges can be + // coalesced. + SmallVector<int, 16> LHSValNoAssignments; + SmallVector<int, 16> RHSValNoAssignments; + DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; + DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; + SmallVector<VNInfo*, 16> NewVNInfo; + + LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg()); + DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; }); + + // Loop over the value numbers of the LHS, seeing if any are defined from + // the RHS. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? + continue; + + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; + + // DstReg is known to be a register in the LHS interval. If the src is + // from the RHS interval, we can use its value #. + if (!CP.isCoalescable(VNI->getCopy())) + continue; + + // Figure out the value # from the RHS. + LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + LHSValsDefinedFromRHS[VNI] = lr->valno; + } + + // Loop over the value numbers of the RHS, seeing if any are defined from + // the LHS. + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? + continue; + + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; + + // DstReg is known to be a register in the RHS interval. If the src is + // from the LHS interval, we can use its value #. + if (!CP.isCoalescable(VNI->getCopy())) + continue; + + // Figure out the value # from the LHS. + LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + RHSValsDefinedFromLHS[VNI] = lr->valno; + } + + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); + + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + ComputeUltimateVN(VNI, NewVNInfo, + LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, + LHSValNoAssignments, RHSValNoAssignments); + } + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + // If this value number isn't a copy from the LHS, it's a new number. + if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { + NewVNInfo.push_back(VNI); + RHSValNoAssignments[VN] = NewVNInfo.size()-1; + continue; + } + + ComputeUltimateVN(VNI, NewVNInfo, + RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, + RHSValNoAssignments, LHSValNoAssignments); + } + + // Armed with the mappings of LHS/RHS values to ultimate values, walk the + // interval lists to see if these intervals are coalescable. + LiveInterval::const_iterator I = LHS.begin(); + LiveInterval::const_iterator IE = LHS.end(); + LiveInterval::const_iterator J = RHS.begin(); + LiveInterval::const_iterator JE = RHS.end(); + + // Skip ahead until the first place of potential sharing. + if (I != IE && J != JE) { + if (I->start < J->start) { + I = std::upper_bound(I, IE, J->start); + if (I != LHS.begin()) --I; + } else if (J->start < I->start) { + J = std::upper_bound(J, JE, I->start); + if (J != RHS.begin()) --J; + } + } + + while (I != IE && J != JE) { + // Determine if these two live ranges overlap. + bool Overlaps; + if (I->start < J->start) { + Overlaps = I->end > J->start; + } else { + Overlaps = J->end > I->start; + } + + // If so, check value # info to determine if they are really different. + if (Overlaps) { + // If the live range overlap will map to the same value number in the + // result liverange, we can still coalesce them. If not, we can't. + if (LHSValNoAssignments[I->valno->id] != + RHSValNoAssignments[J->valno->id]) + return false; + // If it's re-defined by an early clobber somewhere in the live range, + // then conservatively abort coalescing. + if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC()) + return false; + } + + if (I->end < J->end) + ++I; + else + ++J; + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), + E = LHSValsDefinedFromRHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned LHSValID = LHSValNoAssignments[VNI->id]; + if (VNI->hasPHIKill()) + NewVNInfo[LHSValID]->setHasPHIKill(true); + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), + E = RHSValsDefinedFromLHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned RHSValID = RHSValNoAssignments[VNI->id]; + if (VNI->hasPHIKill()) + NewVNInfo[RHSValID]->setHasPHIKill(true); + } + + if (LHSValNoAssignments.empty()) + LHSValNoAssignments.push_back(-1); + if (RHSValNoAssignments.empty()) + RHSValNoAssignments.push_back(-1); + + // If we get here, we know that we can coalesce the live ranges. Ask the + // intervals to coalesce themselves now. + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, + mri_); + return true; +} + +namespace { + // DepthMBBCompare - Comparison predicate that sort first based on the loop + // depth of the basic block (the unsigned), and then on the MBB number. + struct DepthMBBCompare { + typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; + bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { + // Deeper loops first + if (LHS.first != RHS.first) + return LHS.first > RHS.first; + + // Prefer blocks that are more connected in the CFG. This takes care of + // the most difficult copies first while intervals are short. + unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); + unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); + if (cl != cr) + return cl > cr; + + // As a last resort, sort by block number. + return LHS.second->getNumber() < RHS.second->getNumber(); + } + }; +} + +void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, + std::vector<CopyRec> &TryAgain) { + DEBUG(dbgs() << MBB->getName() << ":\n"); + + std::vector<CopyRec> VirtCopies; + std::vector<CopyRec> PhysCopies; + std::vector<CopyRec> ImpDefCopies; + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E;) { + MachineInstr *Inst = MII++; + + // If this isn't a copy nor a extract_subreg, we can't join intervals. + unsigned SrcReg, DstReg; + if (Inst->isCopy()) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(1).getReg(); + } else if (Inst->isSubregToReg()) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(2).getReg(); + } else + continue; + + bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + ImpDefCopies.push_back(CopyRec(Inst, 0)); + else if (SrcIsPhys || DstIsPhys) + PhysCopies.push_back(CopyRec(Inst, 0)); + else + VirtCopies.push_back(CopyRec(Inst, 0)); + } + + // Try coalescing implicit copies and insert_subreg <undef> first, + // followed by copies to / from physical registers, then finally copies + // from virtual registers to virtual registers. + for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { + CopyRec &TheCopy = ImpDefCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { + CopyRec &TheCopy = PhysCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { + CopyRec &TheCopy = VirtCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } +} + +void SimpleRegisterCoalescing::joinIntervals() { + DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); + + std::vector<CopyRec> TryAgainList; + if (loopInfo->empty()) { + // If there are no loops in the function, join intervals in function order. + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); + I != E; ++I) + CopyCoalesceInMBB(I, TryAgainList); + } else { + // Otherwise, join intervals in inner loops before other intervals. + // Unfortunately we can't just iterate over loop hierarchy here because + // there may be more MBB's than BB's. Collect MBB's for sorting. + + // Join intervals in the function prolog first. We want to join physical + // registers with virtual registers before the intervals got too long. + std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){ + MachineBasicBlock *MBB = I; + MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I)); + } + + // Sort by loop depth. + std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); + + // Finally, join intervals in loop nest order. + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) + CopyCoalesceInMBB(MBBs[i].second, TryAgainList); + } + + // Joining intervals can allow other intervals to be joined. Iteratively join + // until we make no progress. + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; + + for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { + CopyRec &TheCopy = TryAgainList[i]; + if (!TheCopy.MI) + continue; + + bool Again = false; + bool Success = JoinCopy(TheCopy, Again); + if (Success || !Again) { + TheCopy.MI = 0; // Mark this one as done. + ProgressMade = true; + } + } + } +} + +/// Return true if the two specified registers belong to different register +/// classes. The registers may be either phys or virt regs. +bool +SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA, + unsigned RegB) const { + // Get the register classes for the first reg. + if (TargetRegisterInfo::isPhysicalRegister(RegA)) { + assert(TargetRegisterInfo::isVirtualRegister(RegB) && + "Shouldn't consider two physregs!"); + return !mri_->getRegClass(RegB)->contains(RegA); + } + + // Compare against the regclass for the second reg. + const TargetRegisterClass *RegClassA = mri_->getRegClass(RegA); + if (TargetRegisterInfo::isVirtualRegister(RegB)) { + const TargetRegisterClass *RegClassB = mri_->getRegClass(RegB); + return RegClassA != RegClassB; + } + return !RegClassA->contains(RegB); +} + +/// lastRegisterUse - Returns the last (non-debug) use of the specific register +/// between cycles Start and End or NULL if there are no uses. +MachineOperand * +SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, + SlotIndex End, + unsigned Reg, + SlotIndex &UseIdx) const{ + UseIdx = SlotIndex(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + MachineOperand *LastUse = NULL; + for (MachineRegisterInfo::use_nodbg_iterator I = mri_->use_nodbg_begin(Reg), + E = mri_->use_nodbg_end(); I != E; ++I) { + MachineOperand &Use = I.getOperand(); + MachineInstr *UseMI = Use.getParent(); + if (UseMI->isIdentityCopy()) + continue; + SlotIndex Idx = li_->getInstructionIndex(UseMI); + // FIXME: Should this be Idx != UseIdx? SlotIndex() will return something + // that compares higher than any other interval. + if (Idx >= Start && Idx < End && Idx >= UseIdx) { + LastUse = &Use; + UseIdx = Idx.getUseIndex(); + } + } + return LastUse; + } + + SlotIndex s = Start; + SlotIndex e = End.getPrevSlot().getBaseIndex(); + while (e >= s) { + // Skip deleted instructions + MachineInstr *MI = li_->getInstructionFromIndex(e); + while (e != SlotIndex() && e.getPrevIndex() >= s && !MI) { + e = e.getPrevIndex(); + MI = li_->getInstructionFromIndex(e); + } + if (e < s || MI == NULL) + return NULL; + + // Ignore identity copies. + if (!MI->isIdentityCopy()) + for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { + MachineOperand &Use = MI->getOperand(i); + if (Use.isReg() && Use.isUse() && Use.getReg() && + tri_->regsOverlap(Use.getReg(), Reg)) { + UseIdx = e.getUseIndex(); + return &Use; + } + } + + e = e.getPrevIndex(); + } + + return NULL; +} + +void SimpleRegisterCoalescing::releaseMemory() { + JoinedCopies.clear(); + ReMatCopies.clear(); + ReMatDefs.clear(); +} + +bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &fn.getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + li_ = &getAnalysis<LiveIntervals>(); + AA = &getAnalysis<AliasAnalysis>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'); + + for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(), + E = tri_->regclass_end(); I != E; ++I) + allocatableRCRegs_.insert(std::make_pair(*I, + tri_->getAllocatableSet(fn, *I))); + + // Join (coalesce) intervals if requested. + if (EnableJoining) { + joinIntervals(); + DEBUG({ + dbgs() << "********** INTERVALS POST JOINING **********\n"; + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); + I != E; ++I){ + I->second->print(dbgs(), tri_); + dbgs() << "\n"; + } + }); + } + + // Perform a final pass over the instructions and compute spill weights + // and remove identity moves. + SmallVector<unsigned, 4> DeadDefs; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* mbb = mbbi; + for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); + mii != mie; ) { + MachineInstr *MI = mii; + if (JoinedCopies.count(MI)) { + // Delete all coalesced copies. + bool DoDelete = true; + assert(MI->isCopyLike() && "Unrecognized copy instruction"); + unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + MI->getNumOperands() > 2) + // Do not delete extract_subreg, insert_subreg of physical + // registers unless the definition is dead. e.g. + // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 + // or else the scavenger may complain. LowerSubregs will + // delete them later. + DoDelete = false; + + if (MI->allDefsAreDead()) { + LiveInterval &li = li_->getInterval(SrcReg); + if (!ShortenDeadCopySrcLiveRange(li, MI)) + ShortenDeadCopyLiveRange(li, MI); + DoDelete = true; + } + if (!DoDelete) { + // We need the instruction to adjust liveness, so make it a KILL. + if (MI->isSubregToReg()) { + MI->RemoveOperand(3); + MI->RemoveOperand(1); + } + MI->setDesc(tii_->get(TargetOpcode::KILL)); + mii = llvm::next(mii); + } else { + li_->RemoveMachineInstrFromMaps(MI); + mii = mbbi->erase(mii); + ++numPeep; + } + continue; + } + + // Now check if this is a remat'ed def instruction which is now dead. + if (ReMatDefs.count(MI)) { + bool isDead = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + DeadDefs.push_back(Reg); + if (MO.isDead()) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !mri_->use_nodbg_empty(Reg)) { + isDead = false; + break; + } + } + if (isDead) { + while (!DeadDefs.empty()) { + unsigned DeadDef = DeadDefs.back(); + DeadDefs.pop_back(); + RemoveDeadDef(li_->getInterval(DeadDef), MI); + } + li_->RemoveMachineInstrFromMaps(mii); + mii = mbbi->erase(mii); + continue; + } else + DeadDefs.clear(); + } + + // If the move will be an identity move delete it + if (MI->isIdentityCopy()) { + unsigned SrcReg = MI->getOperand(1).getReg(); + if (li_->hasInterval(SrcReg)) { + LiveInterval &RegInt = li_->getInterval(SrcReg); + // If def of this move instruction is dead, remove its live range + // from the destination register's live interval. + if (MI->allDefsAreDead()) { + if (!ShortenDeadCopySrcLiveRange(RegInt, MI)) + ShortenDeadCopyLiveRange(RegInt, MI); + } + } + li_->RemoveMachineInstrFromMaps(MI); + mii = mbbi->erase(mii); + ++numPeep; + continue; + } + + ++mii; + + // Check for now unnecessary kill flags. + if (li_->isNotInMIMap(MI)) continue; + SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isKill()) continue; + unsigned reg = MO.getReg(); + if (!reg || !li_->hasInterval(reg)) continue; + if (!li_->getInterval(reg).killedAt(DefIdx)) + MO.setIsKill(false); + } + } + } + + DEBUG(dump()); + return true; +} + +/// print - Implement the dump method. +void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const { + li_->print(O, m); +} + +RegisterCoalescer* llvm::createSimpleRegisterCoalescer() { + return new SimpleRegisterCoalescing(); +} + +// Make sure that anything that uses RegisterCoalescer pulls in this file... +DEFINING_FILE_FOR(SimpleRegisterCoalescing) diff --git a/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h new file mode 100644 index 0000000..855bdb9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h @@ -0,0 +1,188 @@ +//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple register copy coalescing phase. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H +#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/ADT/BitVector.h" + +namespace llvm { + class SimpleRegisterCoalescing; + class LiveVariables; + class TargetRegisterInfo; + class TargetInstrInfo; + class VirtRegMap; + class MachineLoopInfo; + + /// CopyRec - Representation for copy instructions in coalescer queue. + /// + struct CopyRec { + MachineInstr *MI; + unsigned LoopDepth; + CopyRec(MachineInstr *mi, unsigned depth) + : MI(mi), LoopDepth(depth) {} + }; + + class SimpleRegisterCoalescing : public MachineFunctionPass, + public RegisterCoalescer { + MachineFunction* mf_; + MachineRegisterInfo* mri_; + const TargetMachine* tm_; + const TargetRegisterInfo* tri_; + const TargetInstrInfo* tii_; + LiveIntervals *li_; + const MachineLoopInfo* loopInfo; + AliasAnalysis *AA; + + DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_; + + /// JoinedCopies - Keep track of copies eliminated due to coalescing. + /// + SmallPtrSet<MachineInstr*, 32> JoinedCopies; + + /// ReMatCopies - Keep track of copies eliminated due to remat. + /// + SmallPtrSet<MachineInstr*, 32> ReMatCopies; + + /// ReMatDefs - Keep track of definition instructions which have + /// been remat'ed. + SmallPtrSet<MachineInstr*, 8> ReMatDefs; + + public: + static char ID; // Pass identifcation, replacement for typeid + SimpleRegisterCoalescing() : MachineFunctionPass(ID) {} + + struct InstrSlots { + enum { + LOAD = 0, + USE = 1, + DEF = 2, + STORE = 3, + NUM = 4 + }; + }; + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual void releaseMemory(); + + /// runOnMachineFunction - pass entry point + virtual bool runOnMachineFunction(MachineFunction&); + + bool coalesceFunction(MachineFunction &mf, RegallocQuery &) { + // This runs as an independent pass, so don't do anything. + return false; + } + + /// print - Implement the dump method. + virtual void print(raw_ostream &O, const Module* = 0) const; + + private: + /// joinIntervals - join compatible live intervals + void joinIntervals(); + + /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting + /// copies that cannot yet be coalesced into the "TryAgain" list. + void CopyCoalesceInMBB(MachineBasicBlock *MBB, + std::vector<CopyRec> &TryAgain); + + /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, + /// which are the src/dst of the copy instruction CopyMI. This returns true + /// if the copy was successfully coalesced away. If it is not currently + /// possible to coalesce this interval, but it may be possible if other + /// things get coalesced, then it returns true by reference in 'Again'. + bool JoinCopy(CopyRec &TheCopy, bool &Again); + + /// JoinIntervals - Attempt to join these two intervals. On failure, this + /// returns false. The output "SrcInt" will not have been modified, so we can + /// use this information below to update aliases. + bool JoinIntervals(CoalescerPair &CP); + + /// Return true if the two specified registers belong to different register + /// classes. The registers may be either phys or virt regs. + bool differingRegisterClasses(unsigned RegA, unsigned RegB) const; + + /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If + /// the source value number is defined by a copy from the destination reg + /// see if we can merge these two destination reg valno# into a single + /// value number, eliminating a copy. + bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); + + /// HasOtherReachingDefs - Return true if there are definitions of IntB + /// other than BValNo val# that can reach uses of AValno val# of IntA. + bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, + VNInfo *AValNo, VNInfo *BValNo); + + /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy. + /// If the source value number is defined by a commutable instruction and + /// its other operand is coalesced to the copy dest register, see if we + /// can transform the copy into a noop by commuting the definition. + bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); + + /// TrimLiveIntervalToLastUse - If there is a last use in the same basic + /// block as the copy instruction, trim the ive interval to the last use + /// and return true. + bool TrimLiveIntervalToLastUse(SlotIndex CopyIdx, + MachineBasicBlock *CopyMBB, + LiveInterval &li, const LiveRange *LR); + + /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial + /// computation, replace the copy by rematerialize the definition. + bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, + unsigned DstSubIdx, MachineInstr *CopyMI); + + /// isWinToJoinCrossClass - Return true if it's profitable to coalesce + /// two virtual registers from different register classes. + bool isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC); + + /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and + /// update the subregister number if it is not zero. If DstReg is a + /// physical register and the existing subregister number of the def / use + /// being updated is not zero, make sure to set it to the correct physical + /// subregister. + void UpdateRegDefsUses(const CoalescerPair &CP); + + /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. + /// Return true if live interval is removed. + bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI); + + /// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially + /// extended by a dead copy. Mark the last use (if any) of the val# as kill + /// as ends the live range there. If there isn't another use, then this + /// live range is dead. Return true if live interval is removed. + bool ShortenDeadCopySrcLiveRange(LiveInterval &li, MachineInstr *CopyMI); + + /// RemoveDeadDef - If a def of a live interval is now determined dead, + /// remove the val# it defines. If the live interval becomes empty, remove + /// it as well. + bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); + + /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the + /// VNInfo copy flag for DstReg and all aliases. + void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI); + + /// lastRegisterUse - Returns the last use of the specific register between + /// cycles Start and End or NULL if there are no uses. + MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, + unsigned Reg, SlotIndex &LastUseIdx) const; + }; + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp new file mode 100644 index 0000000..b637980 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -0,0 +1,582 @@ +//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which use SjLj +// based exception handling. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sjljehprepare" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +STATISTIC(NumInvokes, "Number of invokes replaced"); +STATISTIC(NumUnwinds, "Number of unwinds replaced"); +STATISTIC(NumSpilled, "Number of registers live across unwind edges"); + +namespace { + class SjLjEHPass : public FunctionPass { + + const TargetLowering *TLI; + + const Type *FunctionContextTy; + Constant *RegisterFn; + Constant *UnregisterFn; + Constant *BuiltinSetjmpFn; + Constant *FrameAddrFn; + Constant *StackAddrFn; + Constant *StackRestoreFn; + Constant *LSDAAddrFn; + Value *PersonalityFn; + Constant *SelectorFn; + Constant *ExceptionFn; + Constant *CallSiteFn; + + Value *CallSite; + public: + static char ID; // Pass identification, replacement for typeid + explicit SjLjEHPass(const TargetLowering *tli = NULL) + : FunctionPass(ID), TLI(tli) { } + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { } + const char *getPassName() const { + return "SJLJ Exception Handling preparation"; + } + + private: + void insertCallSiteStore(Instruction *I, int Number, Value *CallSite); + void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, + SwitchInst *CatchSwitch); + void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes); + bool insertSjLjEHSupport(Function &F); + }; +} // end anonymous namespace + +char SjLjEHPass::ID = 0; + +// Public Interface To the SjLjEHPass pass. +FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) { + return new SjLjEHPass(TLI); +} +// doInitialization - Set up decalarations and types needed to process +// exceptions. +bool SjLjEHPass::doInitialization(Module &M) { + // Build the function context structure. + // builtin_setjmp uses a five word jbuf + const Type *VoidPtrTy = + Type::getInt8PtrTy(M.getContext()); + const Type *Int32Ty = Type::getInt32Ty(M.getContext()); + FunctionContextTy = + StructType::get(M.getContext(), + VoidPtrTy, // __prev + Int32Ty, // call_site + ArrayType::get(Int32Ty, 4), // __data + VoidPtrTy, // __personality + VoidPtrTy, // __lsda + ArrayType::get(VoidPtrTy, 5), // __jbuf + NULL); + RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register", + Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), + (Type *)0); + UnregisterFn = + M.getOrInsertFunction("_Unwind_SjLj_Unregister", + Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), + (Type *)0); + FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); + StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); + BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); + LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); + SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector); + ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception); + CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); + PersonalityFn = 0; + + return true; +} + +/// insertCallSiteStore - Insert a store of the call-site value to the +/// function context +void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number, + Value *CallSite) { + ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()), + Number); + // Insert a store of the call-site number + new StoreInst(CallSiteNoC, CallSite, true, I); // volatile +} + +/// markInvokeCallSite - Insert code to mark the call_site for this invoke +void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo, + Value *CallSite, + SwitchInst *CatchSwitch) { + ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()), + InvokeNo); + // The runtime comes back to the dispatcher with the call_site - 1 in + // the context. Odd, but there it is. + ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()), + InvokeNo - 1); + + // If the unwind edge has phi nodes, split the edge. + if (isa<PHINode>(II->getUnwindDest()->begin())) { + SplitCriticalEdge(II, 1, this); + + // If there are any phi nodes left, they must have a single predecessor. + while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) { + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + PN->eraseFromParent(); + } + } + + // Insert the store of the call site value + insertCallSiteStore(II, InvokeNo, CallSite); + + // Record the call site value for the back end so it stays associated with + // the invoke. + CallInst::Create(CallSiteFn, CallSiteNoC, "", II); + + // Add a switch case to our unwind block. + CatchSwitch->addCase(SwitchValC, II->getUnwindDest()); + // We still want this to look like an invoke so we emit the LSDA properly, + // so we don't transform the invoke into a call here. +} + +/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until +/// we reach blocks we've already seen. +static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) { + if (!LiveBBs.insert(BB).second) return; // already been here. + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + MarkBlocksLiveIn(*PI, LiveBBs); +} + +/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge +/// we spill into a stack location, guaranteeing that there is nothing live +/// across the unwind edge. This process also splits all critical edges +/// coming out of invoke's. +/// FIXME: Move this function to a common utility file (Local.cpp?) so +/// both SjLj and LowerInvoke can use it. +void SjLjEHPass:: +splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { + // First step, split all critical edges from invoke instructions. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + InvokeInst *II = Invokes[i]; + SplitCriticalEdge(II, 0, this); + SplitCriticalEdge(II, 1, this); + assert(!isa<PHINode>(II->getNormalDest()) && + !isa<PHINode>(II->getUnwindDest()) && + "critical edge splitting left single entry phi nodes?"); + } + + Function *F = Invokes.back()->getParent()->getParent(); + + // To avoid having to handle incoming arguments specially, we lower each arg + // to a copy instruction in the entry block. This ensures that the argument + // value itself cannot be live across the entry block. + BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); + while (isa<AllocaInst>(AfterAllocaInsertPt) && + isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) + ++AfterAllocaInsertPt; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + AI != E; ++AI) { + const Type *Ty = AI->getType(); + // Aggregate types can't be cast, but are legal argument types, so we have + // to handle them differently. We use an extract/insert pair as a + // lightweight method to achieve the same goal. + if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { + Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); + Instruction *NI = InsertValueInst::Create(AI, EI, 0); + NI->insertAfter(EI); + AI->replaceAllUsesWith(NI); + // Set the operand of the instructions back to the AllocaInst. + EI->setOperand(0, AI); + NI->setOperand(0, AI); + } else { + // This is always a no-op cast because we're casting AI to AI->getType() + // so src and destination types are identical. BitCast is the only + // possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Set the operand of the cast instruction back to the AllocaInst. + // Normally it's forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, + // we're replacing it here with the same value it was constructed with. + // We do this because the above replaceAllUsesWith() clobbered the + // operand, but we want this one to remain. + NC->setOperand(0, AI); + } + } + + // Finally, scan the code looking for instructions with bad live ranges. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + // Ignore obvious cases we don't have to handle. In particular, most + // instructions either have no uses or only have a single use inside the + // current block. Ignore them quickly. + Instruction *Inst = II; + if (Inst->use_empty()) continue; + if (Inst->hasOneUse() && + cast<Instruction>(Inst->use_back())->getParent() == BB && + !isa<PHINode>(Inst->use_back())) continue; + + // If this is an alloca in the entry block, it's not a real register + // value. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) + if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) + continue; + + // Avoid iterator invalidation by copying users to a temporary vector. + SmallVector<Instruction*,16> Users; + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (User->getParent() != BB || isa<PHINode>(User)) + Users.push_back(User); + } + + // Find all of the blocks that this value is live in. + std::set<BasicBlock*> LiveBBs; + LiveBBs.insert(Inst->getParent()); + while (!Users.empty()) { + Instruction *U = Users.back(); + Users.pop_back(); + + if (!isa<PHINode>(U)) { + MarkBlocksLiveIn(U->getParent(), LiveBBs); + } else { + // Uses for a PHI node occur in their predecessor block. + PHINode *PN = cast<PHINode>(U); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Inst) + MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); + } + } + + // Now that we know all of the blocks that this thing is live in, see if + // it includes any of the unwind locations. + bool NeedsSpill = false; + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + NeedsSpill = true; + } + } + + // If we decided we need a spill, do it. + // FIXME: Spilling this way is overkill, as it forces all uses of + // the value to be reloaded from the stack slot, even those that aren't + // in the unwind blocks. We should be more selective. + if (NeedsSpill) { + ++NumSpilled; + DemoteRegToStack(*Inst, true); + } + } +} + +bool SjLjEHPass::insertSjLjEHSupport(Function &F) { + SmallVector<ReturnInst*,16> Returns; + SmallVector<UnwindInst*,16> Unwinds; + SmallVector<InvokeInst*,16> Invokes; + + // Look through the terminators of the basic blocks to find invokes, returns + // and unwinds. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + // Remember all return instructions in case we insert an invoke into this + // function. + Returns.push_back(RI); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + Invokes.push_back(II); + } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + Unwinds.push_back(UI); + } + } + // If we don't have any invokes or unwinds, there's nothing to do. + if (Unwinds.empty() && Invokes.empty()) return false; + + // Find the eh.selector.*, eh.exception and alloca calls. + // + // Remember any allocas() that aren't in the entry block, as the + // jmpbuf saved SP will need to be updated for them. + // + // We'll use the first eh.selector to determine the right personality + // function to use. For SJLJ, we always use the same personality for the + // whole function, not on a per-selector basis. + // FIXME: That's a bit ugly. Better way? + SmallVector<CallInst*,16> EH_Selectors; + SmallVector<CallInst*,16> EH_Exceptions; + SmallVector<Instruction*,16> JmpbufUpdatePoints; + // Note: Skip the entry block since there's nothing there that interests + // us. eh.selector and eh.exception shouldn't ever be there, and we + // want to disregard any allocas that are there. + for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (CI->getCalledFunction() == SelectorFn) { + if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1); + EH_Selectors.push_back(CI); + } else if (CI->getCalledFunction() == ExceptionFn) { + EH_Exceptions.push_back(CI); + } else if (CI->getCalledFunction() == StackRestoreFn) { + JmpbufUpdatePoints.push_back(CI); + } + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + JmpbufUpdatePoints.push_back(AI); + } + } + } + // If we don't have any eh.selector calls, we can't determine the personality + // function. Without a personality function, we can't process exceptions. + if (!PersonalityFn) return false; + + NumInvokes += Invokes.size(); + NumUnwinds += Unwinds.size(); + + if (!Invokes.empty()) { + // We have invokes, so we need to add register/unregister calls to get + // this function onto the global unwind stack. + // + // First thing we need to do is scan the whole function for values that are + // live across unwind edges. Each value that is live across an unwind edge + // we spill into a stack location, guaranteeing that there is nothing live + // across the unwind edge. This process also splits all critical edges + // coming out of invoke's. + splitLiveRangesAcrossInvokes(Invokes); + + BasicBlock *EntryBB = F.begin(); + // Create an alloca for the incoming jump buffer ptr and the new jump buffer + // that needs to be restored on all exits from the function. This is an + // alloca because the value needs to be added to the global context list. + unsigned Align = 4; // FIXME: Should be a TLI check? + AllocaInst *FunctionContext = + new AllocaInst(FunctionContextTy, 0, Align, + "fcn_context", F.begin()->begin()); + + Value *Idxs[2]; + const Type *Int32Ty = Type::getInt32Ty(F.getContext()); + Value *Zero = ConstantInt::get(Int32Ty, 0); + // We need to also keep around a reference to the call_site field + Idxs[0] = Zero; + Idxs[1] = ConstantInt::get(Int32Ty, 1); + CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "call_site", + EntryBB->getTerminator()); + + // The exception selector comes back in context->data[1] + Idxs[1] = ConstantInt::get(Int32Ty, 2); + Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "fc_data", + EntryBB->getTerminator()); + Idxs[1] = ConstantInt::get(Int32Ty, 1); + Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, + "exc_selector_gep", + EntryBB->getTerminator()); + // The exception value comes back in context->data[0] + Idxs[1] = Zero; + Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, + "exception_gep", + EntryBB->getTerminator()); + + // The result of the eh.selector call will be replaced with a + // a reference to the selector value returned in the function + // context. We leave the selector itself so the EH analysis later + // can use it. + for (int i = 0, e = EH_Selectors.size(); i < e; ++i) { + CallInst *I = EH_Selectors[i]; + Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I); + I->replaceAllUsesWith(SelectorVal); + } + // eh.exception calls are replaced with references to the proper + // location in the context. Unlike eh.selector, the eh.exception + // calls are removed entirely. + for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) { + CallInst *I = EH_Exceptions[i]; + // Possible for there to be duplicates, so check to make sure + // the instruction hasn't already been removed. + if (!I->getParent()) continue; + Value *Val = new LoadInst(ExceptionAddr, "exception", true, I); + const Type *Ty = Type::getInt8PtrTy(F.getContext()); + Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I); + + I->replaceAllUsesWith(Val); + I->eraseFromParent(); + } + + // The entry block changes to have the eh.sjlj.setjmp, with a conditional + // branch to a dispatch block for non-zero returns. If we return normally, + // we're not handling an exception and just register the function context + // and continue. + + // Create the dispatch block. The dispatch block is basically a big switch + // statement that goes to all of the invoke landing pads. + BasicBlock *DispatchBlock = + BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); + + // Insert a load in the Catch block, and a switch on its value. By default, + // we go to a block that just does an unwind (which is the correct action + // for a standard call). + BasicBlock *UnwindBlock = + BasicBlock::Create(F.getContext(), "unwindbb", &F); + Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock)); + + Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, + DispatchBlock); + SwitchInst *DispatchSwitch = + SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), + DispatchBlock); + // Split the entry block to insert the conditional branch for the setjmp. + BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), + "eh.sjlj.setjmp.cont"); + + // Populate the Function Context + // 1. LSDA address + // 2. Personality function address + // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp) + + // LSDA address + Idxs[0] = Zero; + Idxs[1] = ConstantInt::get(Int32Ty, 4); + Value *LSDAFieldPtr = + GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "lsda_gep", + EntryBB->getTerminator()); + Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", + EntryBB->getTerminator()); + new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); + + Idxs[1] = ConstantInt::get(Int32Ty, 3); + Value *PersonalityFieldPtr = + GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "lsda_gep", + EntryBB->getTerminator()); + new StoreInst(PersonalityFn, PersonalityFieldPtr, true, + EntryBB->getTerminator()); + + // Save the frame pointer. + Idxs[1] = ConstantInt::get(Int32Ty, 5); + Value *JBufPtr + = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "jbuf_gep", + EntryBB->getTerminator()); + Idxs[1] = ConstantInt::get(Int32Ty, 0); + Value *FramePtr = + GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep", + EntryBB->getTerminator()); + + Value *Val = CallInst::Create(FrameAddrFn, + ConstantInt::get(Int32Ty, 0), + "fp", + EntryBB->getTerminator()); + new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); + + // Save the stack pointer. + Idxs[1] = ConstantInt::get(Int32Ty, 2); + Value *StackPtr = + GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep", + EntryBB->getTerminator()); + + Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); + new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); + + // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. + Value *SetjmpArg = + CastInst::Create(Instruction::BitCast, JBufPtr, + Type::getInt8PtrTy(F.getContext()), "", + EntryBB->getTerminator()); + Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, + "dispatch", + EntryBB->getTerminator()); + // check the return value of the setjmp. non-zero goes to dispatcher. + Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), + ICmpInst::ICMP_EQ, DispatchVal, Zero, + "notunwind"); + // Nuke the uncond branch. + EntryBB->getTerminator()->eraseFromParent(); + + // Put in a new condbranch in its place. + BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB); + + // Register the function context and make sure it's known to not throw + CallInst *Register = + CallInst::Create(RegisterFn, FunctionContext, "", + ContBlock->getTerminator()); + Register->setDoesNotThrow(); + + // At this point, we are all set up, update the invoke instructions + // to mark their call_site values, and fill in the dispatch switch + // accordingly. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) + markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch); + + // Mark call instructions that aren't nounwind as no-action + // (call_site == -1). Skip the entry block, as prior to then, no function + // context has been created for this function and any unexpected exceptions + // thrown will go directly to the caller's context, which is what we want + // anyway, so no need to do anything here. + for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { + for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + // Ignore calls to the EH builtins (eh.selector, eh.exception) + Constant *Callee = CI->getCalledFunction(); + if (Callee != SelectorFn && Callee != ExceptionFn + && !CI->doesNotThrow()) + insertCallSiteStore(CI, -1, CallSite); + } + } + + // Replace all unwinds with a branch to the unwind handler. + // ??? Should this ever happen with sjlj exceptions? + for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { + BranchInst::Create(UnwindBlock, Unwinds[i]); + Unwinds[i]->eraseFromParent(); + } + + // Following any allocas not in the entry block, update the saved SP + // in the jmpbuf to the new value. + for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) { + Instruction *AI = JmpbufUpdatePoints[i]; + Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); + StackAddr->insertAfter(AI); + Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); + StoreStackAddr->insertAfter(StackAddr); + } + + // Finally, for any returns from this function, if this function contains an + // invoke, add a call to unregister the function context. + for (unsigned i = 0, e = Returns.size(); i != e; ++i) + CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]); + } + + return true; +} + +bool SjLjEHPass::runOnFunction(Function &F) { + bool Res = insertSjLjEHSupport(F); + return Res; +} diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp new file mode 100644 index 0000000..1bc148f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -0,0 +1,229 @@ +//===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "slotindexes" + +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + + +// Yep - these are thread safe. See the header for details. +namespace { + + + class EmptyIndexListEntry : public IndexListEntry { + public: + EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {} + }; + + class TombstoneIndexListEntry : public IndexListEntry { + public: + TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {} + }; + + // The following statics are thread safe. They're read only, and you + // can't step from them to any other list entries. + ManagedStatic<EmptyIndexListEntry> IndexListEntryEmptyKey; + ManagedStatic<TombstoneIndexListEntry> IndexListEntryTombstoneKey; +} + +char SlotIndexes::ID = 0; +INITIALIZE_PASS(SlotIndexes, "slotindexes", + "Slot index numbering", false, false); + +IndexListEntry* IndexListEntry::getEmptyKeyEntry() { + return &*IndexListEntryEmptyKey; +} + +IndexListEntry* IndexListEntry::getTombstoneKeyEntry() { + return &*IndexListEntryTombstoneKey; +} + + +void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { + au.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(au); +} + +void SlotIndexes::releaseMemory() { + mi2iMap.clear(); + mbb2IdxMap.clear(); + idx2MBBMap.clear(); + terminatorGaps.clear(); + clearList(); +} + +bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { + + // Compute numbering as follows: + // Grab an iterator to the start of the index list. + // Iterate over all MBBs, and within each MBB all MIs, keeping the MI + // iterator in lock-step (though skipping it over indexes which have + // null pointers in the instruction field). + // At each iteration assert that the instruction pointed to in the index + // is the same one pointed to by the MI iterator. This + + // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should + // only need to be set up once after the first numbering is computed. + + mf = &fn; + initList(); + + // Check that the list contains only the sentinal. + assert(indexListHead->getNext() == 0 && + "Index list non-empty at initial numbering?"); + assert(idx2MBBMap.empty() && + "Index -> MBB mapping non-empty at initial numbering?"); + assert(mbb2IdxMap.empty() && + "MBB -> Index mapping non-empty at initial numbering?"); + assert(mi2iMap.empty() && + "MachineInstr -> Index mapping non-empty at initial numbering?"); + + functionSize = 0; + unsigned index = 0; + + push_back(createEntry(0, index)); + + // Iterate over the function. + for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end(); + mbbItr != mbbEnd; ++mbbItr) { + MachineBasicBlock *mbb = &*mbbItr; + + // Insert an index for the MBB start. + SlotIndex blockStartIndex(back(), SlotIndex::LOAD); + + index += SlotIndex::NUM; + + for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); + miItr != miEnd; ++miItr) { + MachineInstr *mi = miItr; + if (mi->isDebugValue()) + continue; + + if (miItr == mbb->getFirstTerminator()) { + push_back(createEntry(0, index)); + terminatorGaps.insert( + std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT))); + index += SlotIndex::NUM; + } + + // Insert a store index for the instr. + push_back(createEntry(mi, index)); + + // Save this base index in the maps. + mi2iMap.insert( + std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD))); + + ++functionSize; + + unsigned Slots = mi->getDesc().getNumDefs(); + if (Slots == 0) + Slots = 1; + + index += (Slots + 1) * SlotIndex::NUM; + } + + if (mbb->getFirstTerminator() == mbb->end()) { + push_back(createEntry(0, index)); + terminatorGaps.insert( + std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT))); + index += SlotIndex::NUM; + } + + // One blank instruction at the end. + push_back(createEntry(0, index)); + + SlotIndex blockEndIndex(back(), SlotIndex::LOAD); + mbb2IdxMap.insert( + std::make_pair(mbb, std::make_pair(blockStartIndex, blockEndIndex))); + + idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); + } + + // Sort the Idx2MBBMap + std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); + + DEBUG(dump()); + + // And we're done! + return false; +} + +void SlotIndexes::renumberIndexes() { + + // Renumber updates the index of every element of the index list. + // If all instrs in the function have been allocated an index (which has been + // placed in the index list in the order of instruction iteration) then the + // resulting numbering will match what would have been generated by the + // pass during the initial numbering of the function if the new instructions + // had been present. + + functionSize = 0; + unsigned index = 0; + + for (IndexListEntry *curEntry = front(); curEntry != getTail(); + curEntry = curEntry->getNext()) { + + curEntry->setIndex(index); + + if (curEntry->getInstr() == 0) { + // MBB start entry or terminator gap. Just step index by 1. + index += SlotIndex::NUM; + } + else { + ++functionSize; + unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs(); + if (Slots == 0) + Slots = 1; + + index += (Slots + 1) * SlotIndex::NUM; + } + } +} + +void SlotIndexes::dump() const { + for (const IndexListEntry *itr = front(); itr != getTail(); + itr = itr->getNext()) { + dbgs() << itr->getIndex() << " "; + + if (itr->getInstr() != 0) { + dbgs() << *itr->getInstr(); + } else { + dbgs() << "\n"; + } + } + + for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin(); + itr != mbb2IdxMap.end(); ++itr) { + dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - [" + << itr->second.first << ", " << itr->second.second << "]\n"; + } +} + +// Print a SlotIndex to a raw_ostream. +void SlotIndex::print(raw_ostream &os) const { + os << entry().getIndex(); + if (isPHI()) + os << "*"; + else + os << "LudS"[getSlot()]; +} + +// Dump a SlotIndex to stderr. +void SlotIndex::dump() const { + print(dbgs()); + dbgs() << "\n"; +} + diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp new file mode 100644 index 0000000..59d5ab3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Spiller.cpp @@ -0,0 +1,517 @@ +//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "spiller" + +#include "Spiller.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <set> + +using namespace llvm; + +namespace { + enum SpillerName { trivial, standard, splitting, inline_ }; +} + +static cl::opt<SpillerName> +spillerOpt("spiller", + cl::desc("Spiller to use: (default: standard)"), + cl::Prefix, + cl::values(clEnumVal(trivial, "trivial spiller"), + clEnumVal(standard, "default spiller"), + clEnumVal(splitting, "splitting spiller"), + clEnumValN(inline_, "inline", "inline spiller"), + clEnumValEnd), + cl::init(standard)); + +// Spiller virtual destructor implementation. +Spiller::~Spiller() {} + +namespace { + +/// Utility class for spillers. +class SpillerBase : public Spiller { +protected: + MachineFunctionPass *pass; + MachineFunction *mf; + VirtRegMap *vrm; + LiveIntervals *lis; + MachineFrameInfo *mfi; + MachineRegisterInfo *mri; + const TargetInstrInfo *tii; + const TargetRegisterInfo *tri; + + /// Construct a spiller base. + SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) + : pass(&pass), mf(&mf), vrm(&vrm) + { + lis = &pass.getAnalysis<LiveIntervals>(); + mfi = mf.getFrameInfo(); + mri = &mf.getRegInfo(); + tii = mf.getTarget().getInstrInfo(); + tri = mf.getTarget().getRegisterInfo(); + } + + /// Add spill ranges for every use/def of the live interval, inserting loads + /// immediately before each use, and stores after each def. No folding or + /// remat is attempted. + void trivialSpillEverywhere(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &newIntervals) { + DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); + + assert(li->weight != HUGE_VALF && + "Attempting to spill already spilled value."); + + assert(!li->isStackSlot() && + "Trying to spill a stack slot."); + + DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n"); + + const TargetRegisterClass *trc = mri->getRegClass(li->reg); + unsigned ss = vrm->assignVirt2StackSlot(li->reg); + + // Iterate over reg uses/defs. + for (MachineRegisterInfo::reg_iterator + regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) { + + // Grab the use/def instr. + MachineInstr *mi = &*regItr; + + DEBUG(dbgs() << " Processing " << *mi); + + // Step regItr to the next use/def instr. + do { + ++regItr; + } while (regItr != mri->reg_end() && (&*regItr == mi)); + + // Collect uses & defs for this instr. + SmallVector<unsigned, 2> indices; + bool hasUse = false; + bool hasDef = false; + for (unsigned i = 0; i != mi->getNumOperands(); ++i) { + MachineOperand &op = mi->getOperand(i); + if (!op.isReg() || op.getReg() != li->reg) + continue; + hasUse |= mi->getOperand(i).isUse(); + hasDef |= mi->getOperand(i).isDef(); + indices.push_back(i); + } + + // Create a new vreg & interval for this instr. + unsigned newVReg = mri->createVirtualRegister(trc); + vrm->grow(); + vrm->assignVirt2StackSlot(newVReg, ss); + LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); + newLI->weight = HUGE_VALF; + + // Update the reg operands & kill flags. + for (unsigned i = 0; i < indices.size(); ++i) { + unsigned mopIdx = indices[i]; + MachineOperand &mop = mi->getOperand(mopIdx); + mop.setReg(newVReg); + if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) { + mop.setIsKill(true); + } + } + assert(hasUse || hasDef); + + // Insert reload if necessary. + MachineBasicBlock::iterator miItr(mi); + if (hasUse) { + tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc, + tri); + MachineInstr *loadInstr(prior(miItr)); + SlotIndex loadIndex = + lis->InsertMachineInstrInMaps(loadInstr).getDefIndex(); + vrm->addSpillSlotUse(ss, loadInstr); + SlotIndex endIndex = loadIndex.getNextIndex(); + VNInfo *loadVNI = + newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator()); + newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); + } + + // Insert store if necessary. + if (hasDef) { + tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, + true, ss, trc, tri); + MachineInstr *storeInstr(llvm::next(miItr)); + SlotIndex storeIndex = + lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); + vrm->addSpillSlotUse(ss, storeInstr); + SlotIndex beginIndex = storeIndex.getPrevIndex(); + VNInfo *storeVNI = + newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator()); + newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); + } + + newIntervals.push_back(newLI); + } + } +}; + +} // end anonymous namespace + +namespace { + +/// Spills any live range using the spill-everywhere method with no attempt at +/// folding. +class TrivialSpiller : public SpillerBase { +public: + + TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf, + VirtRegMap &vrm) + : SpillerBase(pass, mf, vrm) {} + + void spill(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &) { + // Ignore spillIs - we don't use it. + trivialSpillEverywhere(li, newIntervals); + } +}; + +} // end anonymous namespace + +namespace { + +/// Falls back on LiveIntervals::addIntervalsForSpills. +class StandardSpiller : public Spiller { +protected: + LiveIntervals *lis; + MachineLoopInfo *loopInfo; + VirtRegMap *vrm; +public: + StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf, + VirtRegMap &vrm) + : lis(&pass.getAnalysis<LiveIntervals>()), + loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()), + vrm(&vrm) {} + + /// Falls back on LiveIntervals::addIntervalsForSpills. + void spill(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) { + std::vector<LiveInterval*> added = + lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); + newIntervals.insert(newIntervals.end(), added.begin(), added.end()); + } +}; + +} // end anonymous namespace + +namespace { + +/// When a call to spill is placed this spiller will first try to break the +/// interval up into its component values (one new interval per value). +/// If this fails, or if a call is placed to spill a previously split interval +/// then the spiller falls back on the standard spilling mechanism. +class SplittingSpiller : public StandardSpiller { +public: + SplittingSpiller(MachineFunctionPass &pass, MachineFunction &mf, + VirtRegMap &vrm) + : StandardSpiller(pass, mf, vrm) { + mri = &mf.getRegInfo(); + tii = mf.getTarget().getInstrInfo(); + tri = mf.getTarget().getRegisterInfo(); + } + + void spill(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) { + if (worthTryingToSplit(li)) + tryVNISplit(li); + else + StandardSpiller::spill(li, newIntervals, spillIs); + } + +private: + + MachineRegisterInfo *mri; + const TargetInstrInfo *tii; + const TargetRegisterInfo *tri; + DenseSet<LiveInterval*> alreadySplit; + + bool worthTryingToSplit(LiveInterval *li) const { + return (!alreadySplit.count(li) && li->getNumValNums() > 1); + } + + /// Try to break a LiveInterval into its component values. + std::vector<LiveInterval*> tryVNISplit(LiveInterval *li) { + + DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n"); + + std::vector<LiveInterval*> added; + SmallVector<VNInfo*, 4> vnis; + + std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis)); + + for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(), + vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) { + VNInfo *vni = *vniItr; + + // Skip unused VNIs. + if (vni->isUnused()) + continue; + + DEBUG(dbgs() << " Extracted Val #" << vni->id << " as "); + LiveInterval *splitInterval = extractVNI(li, vni); + + if (splitInterval != 0) { + DEBUG(dbgs() << *splitInterval << "\n"); + added.push_back(splitInterval); + alreadySplit.insert(splitInterval); + } else { + DEBUG(dbgs() << "0\n"); + } + } + + DEBUG(dbgs() << "Original LI: " << *li << "\n"); + + // If there original interval still contains some live ranges + // add it to added and alreadySplit. + if (!li->empty()) { + added.push_back(li); + alreadySplit.insert(li); + } + + return added; + } + + /// Extract the given value number from the interval. + LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const { + assert(vni->isDefAccurate() || vni->isPHIDef()); + + // Create a new vreg and live interval, copy VNI ranges over. + const TargetRegisterClass *trc = mri->getRegClass(li->reg); + unsigned newVReg = mri->createVirtualRegister(trc); + vrm->grow(); + LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); + VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator()); + + // Start by copying all live ranges in the VN to the new interval. + for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end(); + rItr != rEnd; ++rItr) { + if (rItr->valno == vni) { + newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI)); + } + } + + // Erase the old VNI & ranges. + li->removeValNo(vni); + + // Collect all current uses of the register belonging to the given VNI. + // We'll use this to rename the register after we've dealt with the def. + std::set<MachineInstr*> uses; + for (MachineRegisterInfo::use_iterator + useItr = mri->use_begin(li->reg), useEnd = mri->use_end(); + useItr != useEnd; ++useItr) { + uses.insert(&*useItr); + } + + // Process the def instruction for this VNI. + if (newVNI->isPHIDef()) { + // Insert a copy at the start of the MBB. The range proceeding the + // copy will be attached to the original LiveInterval. + MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def); + MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB), + 0, false, lis->getVNInfoAllocator()); + phiDefVNI->setIsPHIDef(true); + li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI)); + LiveRange *oldPHIDefRange = + newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB)); + + // If the old phi def starts in the middle of the range chop it up. + if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) { + LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end, + oldPHIDefRange->valno); + oldPHIDefRange->end = lis->getMBBStartIdx(defMBB); + newLI->addRange(oldPHIDefRange2); + } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) { + // Otherwise if it's at the start of the range just trim it. + oldPHIDefRange->start = copyIdx.getDefIndex(); + } else { + assert(false && "PHI def range doesn't cover PHI def?"); + } + + newVNI->def = copyIdx.getDefIndex(); + newVNI->setCopy(copyMI); + newVNI->setIsPHIDef(false); // not a PHI def anymore. + newVNI->setIsDefAccurate(true); + } else { + // non-PHI def. Rename the def. If it's two-addr that means renaming the + // use and inserting a new copy too. + MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def); + // We'll rename this now, so we can remove it from uses. + uses.erase(defInst); + unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg); + bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx), + twoAddrUseIsUndef = false; + + for (unsigned i = 0; i < defInst->getNumOperands(); ++i) { + MachineOperand &mo = defInst->getOperand(i); + if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) { + mo.setReg(newVReg); + if (isTwoAddr && mo.isUse() && mo.isUndef()) + twoAddrUseIsUndef = true; + } + } + + SlotIndex defIdx = lis->getInstructionIndex(defInst); + newVNI->def = defIdx.getDefIndex(); + + if (isTwoAddr && !twoAddrUseIsUndef) { + MachineBasicBlock *defMBB = defInst->getParent(); + MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + LiveRange *origUseRange = + li->getLiveRangeContaining(newVNI->def.getUseIndex()); + origUseRange->end = copyIdx.getDefIndex(); + VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI, + true, lis->getVNInfoAllocator()); + LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI); + newLI->addRange(copyRange); + } + } + + for (std::set<MachineInstr*>::iterator + usesItr = uses.begin(), usesEnd = uses.end(); + usesItr != usesEnd; ++usesItr) { + MachineInstr *useInst = *usesItr; + SlotIndex useIdx = lis->getInstructionIndex(useInst); + LiveRange *useRange = + newLI->getLiveRangeContaining(useIdx.getUseIndex()); + + // If this use doesn't belong to the new interval skip it. + if (useRange == 0) + continue; + + // This use doesn't belong to the VNI, skip it. + if (useRange->valno != newVNI) + continue; + + // Check if this instr is two address. + unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg); + bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx); + + // Rename uses (and defs for two-address instrs). + for (unsigned i = 0; i < useInst->getNumOperands(); ++i) { + MachineOperand &mo = useInst->getOperand(i); + if (mo.isReg() && (mo.isUse() || isTwoAddress) && + (mo.getReg() == li->reg)) { + mo.setReg(newVReg); + } + } + + // If this is a two address instruction we've got some extra work to do. + if (isTwoAddress) { + // We modified the def operand, so we need to copy back to the original + // reg. + MachineBasicBlock *useMBB = useInst->getParent(); + MachineBasicBlock::iterator useItr(useInst); + MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + + // Change the old two-address defined range & vni to start at + // (and be defined by) the copy. + LiveRange *origDefRange = + li->getLiveRangeContaining(useIdx.getDefIndex()); + origDefRange->start = copyIdx.getDefIndex(); + origDefRange->valno->def = copyIdx.getDefIndex(); + origDefRange->valno->setCopy(copyMI); + + // Insert a new range & vni for the two-address-to-copy value. This + // will be attached to the new live interval. + VNInfo *copyVNI = + newLI->getNextValue(useIdx.getDefIndex(), 0, true, + lis->getVNInfoAllocator()); + LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI); + newLI->addRange(copyRange); + } + } + + // Iterate over any PHI kills - we'll need to insert new copies for them. + for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end(); + LRI != LRE; ++LRI) { + if (LRI->valno != newVNI || LRI->end.isPHI()) + continue; + SlotIndex killIdx = LRI->end; + MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx); + MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(), + DebugLoc(), tii->get(TargetOpcode::COPY), + li->reg) + .addReg(newVReg, RegState::Kill); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + + // Save the current end. We may need it to add a new range if the + // current range runs of the end of the MBB. + SlotIndex newKillRangeEnd = LRI->end; + LRI->end = copyIdx.getDefIndex(); + + if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) { + assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) && + "PHI kill range doesn't reach kill-block end. Not sane."); + newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB), + newKillRangeEnd, newVNI)); + } + + VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(), + copyMI, true, + lis->getVNInfoAllocator()); + newKillVNI->setHasPHIKill(true); + li->addRange(LiveRange(copyIdx.getDefIndex(), + lis->getMBBEndIdx(killMBB), + newKillVNI)); + } + newVNI->setHasPHIKill(false); + + return newLI; + } + +}; + +} // end anonymous namespace + + +namespace llvm { +Spiller *createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm); +} + +llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) { + switch (spillerOpt) { + default: assert(0 && "unknown spiller"); + case trivial: return new TrivialSpiller(pass, mf, vrm); + case standard: return new StandardSpiller(pass, mf, vrm); + case splitting: return new SplittingSpiller(pass, mf, vrm); + case inline_: return createInlineSpiller(pass, mf, vrm); + } +} diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h new file mode 100644 index 0000000..59bc0ec --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Spiller.h @@ -0,0 +1,50 @@ +//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SPILLER_H +#define LLVM_CODEGEN_SPILLER_H + +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + + class LiveInterval; + class MachineFunction; + class MachineFunctionPass; + class SlotIndex; + class VirtRegMap; + + /// Spiller interface. + /// + /// Implementations are utility classes which insert spill or remat code on + /// demand. + class Spiller { + public: + virtual ~Spiller() = 0; + + /// spill - Spill the given live interval. The method used will depend on + /// the Spiller implementation selected. + /// + /// @param li The live interval to be spilled. + /// @param spillIs A list of intervals that are about to be spilled, + /// and so cannot be used for remat etc. + /// @param newIntervals The newly created intervals will be appended here. + virtual void spill(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) = 0; + + }; + + /// Create and return a spiller object, as specified on the command line. + Spiller* createSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm); +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp new file mode 100644 index 0000000..29474f0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -0,0 +1,1097 @@ +//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SplitAnalysis class as well as mutator functions for +// live range splitting. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "splitter" +#include "SplitKit.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +static cl::opt<bool> +AllowSplit("spiller-splits-edges", + cl::desc("Allow critical edge splitting during spilling")); + +//===----------------------------------------------------------------------===// +// Split Analysis +//===----------------------------------------------------------------------===// + +SplitAnalysis::SplitAnalysis(const MachineFunction &mf, + const LiveIntervals &lis, + const MachineLoopInfo &mli) + : mf_(mf), + lis_(lis), + loops_(mli), + tii_(*mf.getTarget().getInstrInfo()), + curli_(0) {} + +void SplitAnalysis::clear() { + usingInstrs_.clear(); + usingBlocks_.clear(); + usingLoops_.clear(); + curli_ = 0; +} + +bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) { + MachineBasicBlock *T, *F; + SmallVector<MachineOperand, 4> Cond; + return !tii_.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond); +} + +/// analyzeUses - Count instructions, basic blocks, and loops using curli. +void SplitAnalysis::analyzeUses() { + const MachineRegisterInfo &MRI = mf_.getRegInfo(); + for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(curli_->reg); + MachineInstr *MI = I.skipInstruction();) { + if (MI->isDebugValue() || !usingInstrs_.insert(MI)) + continue; + MachineBasicBlock *MBB = MI->getParent(); + if (usingBlocks_[MBB]++) + continue; + if (MachineLoop *Loop = loops_.getLoopFor(MBB)) + usingLoops_[Loop]++; + } + DEBUG(dbgs() << " counted " + << usingInstrs_.size() << " instrs, " + << usingBlocks_.size() << " blocks, " + << usingLoops_.size() << " loops.\n"); +} + +/// removeUse - Update statistics by noting that MI no longer uses curli. +void SplitAnalysis::removeUse(const MachineInstr *MI) { + if (!usingInstrs_.erase(MI)) + return; + + // Decrement MBB count. + const MachineBasicBlock *MBB = MI->getParent(); + BlockCountMap::iterator bi = usingBlocks_.find(MBB); + assert(bi != usingBlocks_.end() && "MBB missing"); + assert(bi->second && "0 count in map"); + if (--bi->second) + return; + // No more uses in MBB. + usingBlocks_.erase(bi); + + // Decrement loop count. + MachineLoop *Loop = loops_.getLoopFor(MBB); + if (!Loop) + return; + LoopCountMap::iterator li = usingLoops_.find(Loop); + assert(li != usingLoops_.end() && "Loop missing"); + assert(li->second && "0 count in map"); + if (--li->second) + return; + // No more blocks in Loop. + usingLoops_.erase(li); +} + +// Get three sets of basic blocks surrounding a loop: Blocks inside the loop, +// predecessor blocks, and exit blocks. +void SplitAnalysis::getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks) { + Blocks.clear(); + + // Blocks in the loop. + Blocks.Loop.insert(Loop->block_begin(), Loop->block_end()); + + // Predecessor blocks. + const MachineBasicBlock *Header = Loop->getHeader(); + for (MachineBasicBlock::const_pred_iterator I = Header->pred_begin(), + E = Header->pred_end(); I != E; ++I) + if (!Blocks.Loop.count(*I)) + Blocks.Preds.insert(*I); + + // Exit blocks. + for (MachineLoop::block_iterator I = Loop->block_begin(), + E = Loop->block_end(); I != E; ++I) { + const MachineBasicBlock *MBB = *I; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if (!Blocks.Loop.count(*SI)) + Blocks.Exits.insert(*SI); + } +} + +/// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in +/// and around the Loop. +SplitAnalysis::LoopPeripheralUse SplitAnalysis:: +analyzeLoopPeripheralUse(const SplitAnalysis::LoopBlocks &Blocks) { + LoopPeripheralUse use = ContainedInLoop; + for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end(); + I != E; ++I) { + const MachineBasicBlock *MBB = I->first; + // Is this a peripheral block? + if (use < MultiPeripheral && + (Blocks.Preds.count(MBB) || Blocks.Exits.count(MBB))) { + if (I->second > 1) use = MultiPeripheral; + else use = SinglePeripheral; + continue; + } + // Is it a loop block? + if (Blocks.Loop.count(MBB)) + continue; + // It must be an unrelated block. + return OutsideLoop; + } + return use; +} + +/// getCriticalExits - It may be necessary to partially break critical edges +/// leaving the loop if an exit block has phi uses of curli. Collect the exit +/// blocks that need special treatment into CriticalExits. +void SplitAnalysis::getCriticalExits(const SplitAnalysis::LoopBlocks &Blocks, + BlockPtrSet &CriticalExits) { + CriticalExits.clear(); + + // A critical exit block contains a phi def of curli, and has a predecessor + // that is not in the loop nor a loop predecessor. + // For such an exit block, the edges carrying the new variable must be moved + // to a new pre-exit block. + for (BlockPtrSet::iterator I = Blocks.Exits.begin(), E = Blocks.Exits.end(); + I != E; ++I) { + const MachineBasicBlock *Succ = *I; + SlotIndex SuccIdx = lis_.getMBBStartIdx(Succ); + VNInfo *SuccVNI = curli_->getVNInfoAt(SuccIdx); + // This exit may not have curli live in at all. No need to split. + if (!SuccVNI) + continue; + // If this is not a PHI def, it is either using a value from before the + // loop, or a value defined inside the loop. Both are safe. + if (!SuccVNI->isPHIDef() || SuccVNI->def.getBaseIndex() != SuccIdx) + continue; + // This exit block does have a PHI. Does it also have a predecessor that is + // not a loop block or loop predecessor? + for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(), + PE = Succ->pred_end(); PI != PE; ++PI) { + const MachineBasicBlock *Pred = *PI; + if (Blocks.Loop.count(Pred) || Blocks.Preds.count(Pred)) + continue; + // This is a critical exit block, and we need to split the exit edge. + CriticalExits.insert(Succ); + break; + } + } +} + +/// canSplitCriticalExits - Return true if it is possible to insert new exit +/// blocks before the blocks in CriticalExits. +bool +SplitAnalysis::canSplitCriticalExits(const SplitAnalysis::LoopBlocks &Blocks, + BlockPtrSet &CriticalExits) { + // If we don't allow critical edge splitting, require no critical exits. + if (!AllowSplit) + return CriticalExits.empty(); + + for (BlockPtrSet::iterator I = CriticalExits.begin(), E = CriticalExits.end(); + I != E; ++I) { + const MachineBasicBlock *Succ = *I; + // We want to insert a new pre-exit MBB before Succ, and change all the + // in-loop blocks to branch to the pre-exit instead of Succ. + // Check that all the in-loop predecessors can be changed. + for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(), + PE = Succ->pred_end(); PI != PE; ++PI) { + const MachineBasicBlock *Pred = *PI; + // The external predecessors won't be altered. + if (!Blocks.Loop.count(Pred) && !Blocks.Preds.count(Pred)) + continue; + if (!canAnalyzeBranch(Pred)) + return false; + } + + // If Succ's layout predecessor falls through, that too must be analyzable. + // We need to insert the pre-exit block in the gap. + MachineFunction::const_iterator MFI = Succ; + if (MFI == mf_.begin()) + continue; + if (!canAnalyzeBranch(--MFI)) + return false; + } + // No problems found. + return true; +} + +void SplitAnalysis::analyze(const LiveInterval *li) { + clear(); + curli_ = li; + analyzeUses(); +} + +const MachineLoop *SplitAnalysis::getBestSplitLoop() { + assert(curli_ && "Call analyze() before getBestSplitLoop"); + if (usingLoops_.empty()) + return 0; + + LoopPtrSet Loops, SecondLoops; + LoopBlocks Blocks; + BlockPtrSet CriticalExits; + + // Find first-class and second class candidate loops. + // We prefer to split around loops where curli is used outside the periphery. + for (LoopCountMap::const_iterator I = usingLoops_.begin(), + E = usingLoops_.end(); I != E; ++I) { + const MachineLoop *Loop = I->first; + getLoopBlocks(Loop, Blocks); + + // FIXME: We need an SSA updater to properly handle multiple exit blocks. + if (Blocks.Exits.size() > 1) { + DEBUG(dbgs() << " multiple exits from " << *Loop); + continue; + } + + LoopPtrSet *LPS = 0; + switch(analyzeLoopPeripheralUse(Blocks)) { + case OutsideLoop: + LPS = &Loops; + break; + case MultiPeripheral: + LPS = &SecondLoops; + break; + case ContainedInLoop: + DEBUG(dbgs() << " contained in " << *Loop); + continue; + case SinglePeripheral: + DEBUG(dbgs() << " single peripheral use in " << *Loop); + continue; + } + // Will it be possible to split around this loop? + getCriticalExits(Blocks, CriticalExits); + DEBUG(dbgs() << " " << CriticalExits.size() << " critical exits from " + << *Loop); + if (!canSplitCriticalExits(Blocks, CriticalExits)) + continue; + // This is a possible split. + assert(LPS); + LPS->insert(Loop); + } + + DEBUG(dbgs() << " getBestSplitLoop found " << Loops.size() << " + " + << SecondLoops.size() << " candidate loops.\n"); + + // If there are no first class loops available, look at second class loops. + if (Loops.empty()) + Loops = SecondLoops; + + if (Loops.empty()) + return 0; + + // Pick the earliest loop. + // FIXME: Are there other heuristics to consider? + const MachineLoop *Best = 0; + SlotIndex BestIdx; + for (LoopPtrSet::const_iterator I = Loops.begin(), E = Loops.end(); I != E; + ++I) { + SlotIndex Idx = lis_.getMBBStartIdx((*I)->getHeader()); + if (!Best || Idx < BestIdx) + Best = *I, BestIdx = Idx; + } + DEBUG(dbgs() << " getBestSplitLoop found " << *Best); + return Best; +} + +/// getMultiUseBlocks - if curli has more than one use in a basic block, it +/// may be an advantage to split curli for the duration of the block. +bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) { + // If curli is local to one block, there is no point to splitting it. + if (usingBlocks_.size() <= 1) + return false; + // Add blocks with multiple uses. + for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end(); + I != E; ++I) + switch (I->second) { + case 0: + case 1: + continue; + case 2: { + // It doesn't pay to split a 2-instr block if it redefines curli. + VNInfo *VN1 = curli_->getVNInfoAt(lis_.getMBBStartIdx(I->first)); + VNInfo *VN2 = + curli_->getVNInfoAt(lis_.getMBBEndIdx(I->first).getPrevIndex()); + // live-in and live-out with a different value. + if (VN1 && VN2 && VN1 != VN2) + continue; + } // Fall through. + default: + Blocks.insert(I->first); + } + return !Blocks.empty(); +} + +//===----------------------------------------------------------------------===// +// LiveIntervalMap +//===----------------------------------------------------------------------===// + +// defValue - Introduce a li_ def for ParentVNI that could be later than +// ParentVNI->def. +VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) { + assert(ParentVNI && "Mapping NULL value"); + assert(Idx.isValid() && "Invalid SlotIndex"); + assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + + // Is this a simple 1-1 mapping? Not likely. + if (Idx == ParentVNI->def) + return mapValue(ParentVNI, Idx); + + // This is a complex def. Mark with a NULL in valueMap. + VNInfo *OldVNI = + valueMap_.insert( + ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))).first->second; + // The static_cast<VNInfo *> is only needed to work around a bug in an + // old version of the C++0x standard which the following compilers + // implemented and have yet to fix: + // + // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel + // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01 + // + // If/When we move to C++0x, this can be replaced by nullptr. + (void)OldVNI; + assert(OldVNI == 0 && "Simple/Complex values mixed"); + + // Should we insert a minimal snippet of VNI LiveRange, or can we count on + // callers to do that? We need it for lookups of complex values. + VNInfo *VNI = li_.getNextValue(Idx, 0, true, lis_.getVNInfoAllocator()); + return VNI; +} + +// mapValue - Find the mapped value for ParentVNI at Idx. +// Potentially create phi-def values. +VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) { + assert(ParentVNI && "Mapping NULL value"); + assert(Idx.isValid() && "Invalid SlotIndex"); + assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + + // Use insert for lookup, so we can add missing values with a second lookup. + std::pair<ValueMap::iterator,bool> InsP = + valueMap_.insert(ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))); + // The static_cast<VNInfo *> is only needed to work around a bug in an + // old version of the C++0x standard which the following compilers + // implemented and have yet to fix: + // + // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel + // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01 + // + // If/When we move to C++0x, this can be replaced by nullptr. + + // This was an unknown value. Create a simple mapping. + if (InsP.second) + return InsP.first->second = li_.createValueCopy(ParentVNI, + lis_.getVNInfoAllocator()); + // This was a simple mapped value. + if (InsP.first->second) + return InsP.first->second; + + // This is a complex mapped value. There may be multiple defs, and we may need + // to create phi-defs. + MachineBasicBlock *IdxMBB = lis_.getMBBFromIndex(Idx); + assert(IdxMBB && "No MBB at Idx"); + + // Is there a def in the same MBB we can extend? + if (VNInfo *VNI = extendTo(IdxMBB, Idx)) + return VNI; + + // Now for the fun part. We know that ParentVNI potentially has multiple defs, + // and we may need to create even more phi-defs to preserve VNInfo SSA form. + // Perform a depth-first search for predecessor blocks where we know the + // dominating VNInfo. Insert phi-def VNInfos along the path back to IdxMBB. + + // Track MBBs where we have created or learned the dominating value. + // This may change during the DFS as we create new phi-defs. + typedef DenseMap<MachineBasicBlock*, VNInfo*> MBBValueMap; + MBBValueMap DomValue; + + for (idf_iterator<MachineBasicBlock*> + IDFI = idf_begin(IdxMBB), + IDFE = idf_end(IdxMBB); IDFI != IDFE;) { + MachineBasicBlock *MBB = *IDFI; + SlotIndex End = lis_.getMBBEndIdx(MBB); + + // We are operating on the restricted CFG where ParentVNI is live. + if (parentli_.getVNInfoAt(End.getPrevSlot()) != ParentVNI) { + IDFI.skipChildren(); + continue; + } + + // Do we have a dominating value in this block? + VNInfo *VNI = extendTo(MBB, End); + if (!VNI) { + ++IDFI; + continue; + } + + // Yes, VNI dominates MBB. Track the path back to IdxMBB, creating phi-defs + // as needed along the way. + for (unsigned PI = IDFI.getPathLength()-1; PI != 0; --PI) { + // Start from MBB's immediate successor. End at IdxMBB. + MachineBasicBlock *Succ = IDFI.getPath(PI-1); + std::pair<MBBValueMap::iterator, bool> InsP = + DomValue.insert(MBBValueMap::value_type(Succ, VNI)); + + // This is the first time we backtrack to Succ. + if (InsP.second) + continue; + + // We reached Succ again with the same VNI. Nothing is going to change. + VNInfo *OVNI = InsP.first->second; + if (OVNI == VNI) + break; + + // Succ already has a phi-def. No need to continue. + SlotIndex Start = lis_.getMBBStartIdx(Succ); + if (OVNI->def == Start) + break; + + // We have a collision between the old and new VNI at Succ. That means + // neither dominates and we need a new phi-def. + VNI = li_.getNextValue(Start, 0, true, lis_.getVNInfoAllocator()); + VNI->setIsPHIDef(true); + InsP.first->second = VNI; + + // Replace OVNI with VNI in the remaining path. + for (; PI > 1 ; --PI) { + MBBValueMap::iterator I = DomValue.find(IDFI.getPath(PI-2)); + if (I == DomValue.end() || I->second != OVNI) + break; + I->second = VNI; + } + } + + // No need to search the children, we found a dominating value. + IDFI.skipChildren(); + } + + // The search should at least find a dominating value for IdxMBB. + assert(!DomValue.empty() && "Couldn't find a reaching definition"); + + // Since we went through the trouble of a full DFS visiting all reaching defs, + // the values in DomValue are now accurate. No more phi-defs are needed for + // these blocks, so we can color the live ranges. + // This makes the next mapValue call much faster. + VNInfo *IdxVNI = 0; + for (MBBValueMap::iterator I = DomValue.begin(), E = DomValue.end(); I != E; + ++I) { + MachineBasicBlock *MBB = I->first; + VNInfo *VNI = I->second; + SlotIndex Start = lis_.getMBBStartIdx(MBB); + if (MBB == IdxMBB) { + // Don't add full liveness to IdxMBB, stop at Idx. + if (Start != Idx) + li_.addRange(LiveRange(Start, Idx, VNI)); + // The caller had better add some liveness to IdxVNI, or it leaks. + IdxVNI = VNI; + } else + li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI)); + } + + assert(IdxVNI && "Didn't find value for Idx"); + return IdxVNI; +} + +// extendTo - Find the last li_ value defined in MBB at or before Idx. The +// parentli_ is assumed to be live at Idx. Extend the live range to Idx. +// Return the found VNInfo, or NULL. +VNInfo *LiveIntervalMap::extendTo(MachineBasicBlock *MBB, SlotIndex Idx) { + LiveInterval::iterator I = std::upper_bound(li_.begin(), li_.end(), Idx); + if (I == li_.begin()) + return 0; + --I; + if (I->start < lis_.getMBBStartIdx(MBB)) + return 0; + if (I->end < Idx) + I->end = Idx; + return I->valno; +} + +// addSimpleRange - Add a simple range from parentli_ to li_. +// ParentVNI must be live in the [Start;End) interval. +void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End, + const VNInfo *ParentVNI) { + VNInfo *VNI = mapValue(ParentVNI, Start); + // A simple mappoing is easy. + if (VNI->def == ParentVNI->def) { + li_.addRange(LiveRange(Start, End, VNI)); + return; + } + + // ParentVNI is a complex value. We must map per MBB. + MachineFunction::iterator MBB = lis_.getMBBFromIndex(Start); + MachineFunction::iterator MBBE = lis_.getMBBFromIndex(End); + + if (MBB == MBBE) { + li_.addRange(LiveRange(Start, End, VNI)); + return; + } + + // First block. + li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI)); + + // Run sequence of full blocks. + for (++MBB; MBB != MBBE; ++MBB) { + Start = lis_.getMBBStartIdx(MBB); + li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), + mapValue(ParentVNI, Start))); + } + + // Final block. + Start = lis_.getMBBStartIdx(MBB); + if (Start != End) + li_.addRange(LiveRange(Start, End, mapValue(ParentVNI, Start))); +} + +/// addRange - Add live ranges to li_ where [Start;End) intersects parentli_. +/// All needed values whose def is not inside [Start;End) must be defined +/// beforehand so mapValue will work. +void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) { + LiveInterval::const_iterator B = parentli_.begin(), E = parentli_.end(); + LiveInterval::const_iterator I = std::lower_bound(B, E, Start); + + // Check if --I begins before Start and overlaps. + if (I != B) { + --I; + if (I->end > Start) + addSimpleRange(Start, std::min(End, I->end), I->valno); + ++I; + } + + // The remaining ranges begin after Start. + for (;I != E && I->start < End; ++I) + addSimpleRange(I->start, std::min(End, I->end), I->valno); +} + +//===----------------------------------------------------------------------===// +// Split Editor +//===----------------------------------------------------------------------===// + +/// Create a new SplitEditor for editing the LiveInterval analyzed by SA. +SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, + SmallVectorImpl<LiveInterval*> &intervals) + : sa_(sa), lis_(lis), vrm_(vrm), + mri_(vrm.getMachineFunction().getRegInfo()), + tii_(*vrm.getMachineFunction().getTarget().getInstrInfo()), + curli_(sa_.getCurLI()), + dupli_(0), openli_(0), + intervals_(intervals), + firstInterval(intervals_.size()) +{ + assert(curli_ && "SplitEditor created from empty SplitAnalysis"); + + // Make sure curli_ is assigned a stack slot, so all our intervals get the + // same slot as curli_. + if (vrm_.getStackSlot(curli_->reg) == VirtRegMap::NO_STACK_SLOT) + vrm_.assignVirt2StackSlot(curli_->reg); + +} + +LiveInterval *SplitEditor::createInterval() { + unsigned curli = sa_.getCurLI()->reg; + unsigned Reg = mri_.createVirtualRegister(mri_.getRegClass(curli)); + LiveInterval &Intv = lis_.getOrCreateInterval(Reg); + vrm_.grow(); + vrm_.assignVirt2StackSlot(Reg, vrm_.getStackSlot(curli)); + return &Intv; +} + +LiveInterval *SplitEditor::getDupLI() { + if (!dupli_) { + // Create an interval for dupli that is a copy of curli. + dupli_ = createInterval(); + dupli_->Copy(*curli_, &mri_, lis_.getVNInfoAllocator()); + } + return dupli_; +} + +VNInfo *SplitEditor::mapValue(const VNInfo *curliVNI) { + VNInfo *&VNI = valueMap_[curliVNI]; + if (!VNI) + VNI = openli_->createValueCopy(curliVNI, lis_.getVNInfoAllocator()); + return VNI; +} + +/// Insert a COPY instruction curli -> li. Allocate a new value from li +/// defined by the COPY. Note that rewrite() will deal with the curli +/// register, so this function can be used to copy from any interval - openli, +/// curli, or dupli. +VNInfo *SplitEditor::insertCopy(LiveInterval &LI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + MachineInstr *MI = BuildMI(MBB, I, DebugLoc(), tii_.get(TargetOpcode::COPY), + LI.reg).addReg(curli_->reg); + SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + return LI.getNextValue(DefIdx, MI, true, lis_.getVNInfoAllocator()); +} + +/// Create a new virtual register and live interval. +void SplitEditor::openIntv() { + assert(!openli_ && "Previous LI not closed before openIntv"); + openli_ = createInterval(); + intervals_.push_back(openli_); + liveThrough_ = false; +} + +/// enterIntvBefore - Enter openli before the instruction at Idx. If curli is +/// not live before Idx, a COPY is not inserted. +void SplitEditor::enterIntvBefore(SlotIndex Idx) { + assert(openli_ && "openIntv not called before enterIntvBefore"); + + // Copy from curli_ if it is live. + if (VNInfo *CurVNI = curli_->getVNInfoAt(Idx.getUseIndex())) { + MachineInstr *MI = lis_.getInstructionFromIndex(Idx); + assert(MI && "enterIntvBefore called with invalid index"); + VNInfo *VNI = insertCopy(*openli_, *MI->getParent(), MI); + openli_->addRange(LiveRange(VNI->def, Idx.getDefIndex(), VNI)); + + // Make sure CurVNI is properly mapped. + VNInfo *&mapVNI = valueMap_[CurVNI]; + // We dont have SSA update yet, so only one entry per value is allowed. + assert(!mapVNI && "enterIntvBefore called more than once for the same value"); + mapVNI = VNI; + } + DEBUG(dbgs() << " enterIntvBefore " << Idx << ": " << *openli_ << '\n'); +} + +/// enterIntvAtEnd - Enter openli at the end of MBB. +/// PhiMBB is a successor inside openli where a PHI value is created. +/// Currently, all entries must share the same PhiMBB. +void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) { + assert(openli_ && "openIntv not called before enterIntvAtEnd"); + + SlotIndex EndA = lis_.getMBBEndIdx(&A); + VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex()); + if (!CurVNIA) { + DEBUG(dbgs() << " enterIntvAtEnd, curli not live out of BB#" + << A.getNumber() << ".\n"); + return; + } + + // Add a phi kill value and live range out of A. + VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator()); + openli_->addRange(LiveRange(VNIA->def, EndA, VNIA)); + + // FIXME: If this is the only entry edge, we don't need the extra PHI value. + // FIXME: If there are multiple entry blocks (so not a loop), we need proper + // SSA update. + + // Now look at the start of B. + SlotIndex StartB = lis_.getMBBStartIdx(&B); + SlotIndex EndB = lis_.getMBBEndIdx(&B); + const LiveRange *CurB = curli_->getLiveRangeContaining(StartB); + if (!CurB) { + DEBUG(dbgs() << " enterIntvAtEnd: curli not live in to BB#" + << B.getNumber() << ".\n"); + return; + } + + VNInfo *VNIB = openli_->getVNInfoAt(StartB); + if (!VNIB) { + // Create a phi value. + VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false, + lis_.getVNInfoAllocator()); + VNIB->setIsPHIDef(true); + VNInfo *&mapVNI = valueMap_[CurB->valno]; + if (mapVNI) { + // Multiple copies - must create PHI value. + abort(); + } else { + // This is the first copy of dupLR. Mark the mapping. + mapVNI = VNIB; + } + + } + + DEBUG(dbgs() << " enterIntvAtEnd: " << *openli_ << '\n'); +} + +/// useIntv - indicate that all instructions in MBB should use openli. +void SplitEditor::useIntv(const MachineBasicBlock &MBB) { + useIntv(lis_.getMBBStartIdx(&MBB), lis_.getMBBEndIdx(&MBB)); +} + +void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) { + assert(openli_ && "openIntv not called before useIntv"); + + // Map the curli values from the interval into openli_ + LiveInterval::const_iterator B = curli_->begin(), E = curli_->end(); + LiveInterval::const_iterator I = std::lower_bound(B, E, Start); + + if (I != B) { + --I; + // I begins before Start, but overlaps. + if (I->end > Start) + openli_->addRange(LiveRange(Start, std::min(End, I->end), + mapValue(I->valno))); + ++I; + } + + // The remaining ranges begin after Start. + for (;I != E && I->start < End; ++I) + openli_->addRange(LiveRange(I->start, std::min(End, I->end), + mapValue(I->valno))); + DEBUG(dbgs() << " use [" << Start << ';' << End << "): " << *openli_ + << '\n'); +} + +/// leaveIntvAfter - Leave openli after the instruction at Idx. +void SplitEditor::leaveIntvAfter(SlotIndex Idx) { + assert(openli_ && "openIntv not called before leaveIntvAfter"); + + const LiveRange *CurLR = curli_->getLiveRangeContaining(Idx.getDefIndex()); + if (!CurLR || CurLR->end <= Idx.getBoundaryIndex()) { + DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": not live\n"); + return; + } + + // Was this value of curli live through openli? + if (!openli_->liveAt(CurLR->valno->def)) { + DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": using external value\n"); + liveThrough_ = true; + return; + } + + // We are going to insert a back copy, so we must have a dupli_. + LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Idx.getDefIndex()); + assert(DupLR && "dupli not live into black, but curli is?"); + + // Insert the COPY instruction. + MachineBasicBlock::iterator I = lis_.getInstructionFromIndex(Idx); + MachineInstr *MI = BuildMI(*I->getParent(), llvm::next(I), I->getDebugLoc(), + tii_.get(TargetOpcode::COPY), dupli_->reg) + .addReg(openli_->reg); + SlotIndex CopyIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + openli_->addRange(LiveRange(Idx.getDefIndex(), CopyIdx, + mapValue(CurLR->valno))); + DupLR->valno->def = CopyIdx; + DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": " << *openli_ << '\n'); +} + +/// leaveIntvAtTop - Leave the interval at the top of MBB. +/// Currently, only one value can leave the interval. +void SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { + assert(openli_ && "openIntv not called before leaveIntvAtTop"); + + SlotIndex Start = lis_.getMBBStartIdx(&MBB); + const LiveRange *CurLR = curli_->getLiveRangeContaining(Start); + + // Is curli even live-in to MBB? + if (!CurLR) { + DEBUG(dbgs() << " leaveIntvAtTop at " << Start << ": not live\n"); + return; + } + + // Is curli defined by PHI at the beginning of MBB? + bool isPHIDef = CurLR->valno->isPHIDef() && + CurLR->valno->def.getBaseIndex() == Start; + + // If MBB is using a value of curli that was defined outside the openli range, + // we don't want to copy it back here. + if (!isPHIDef && !openli_->liveAt(CurLR->valno->def)) { + DEBUG(dbgs() << " leaveIntvAtTop at " << Start + << ": using external value\n"); + liveThrough_ = true; + return; + } + + // We are going to insert a back copy, so we must have a dupli_. + LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Start); + assert(DupLR && "dupli not live into black, but curli is?"); + + // Insert the COPY instruction. + MachineInstr *MI = BuildMI(MBB, MBB.begin(), DebugLoc(), + tii_.get(TargetOpcode::COPY), dupli_->reg) + .addReg(openli_->reg); + SlotIndex Idx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + + // Adjust dupli and openli values. + if (isPHIDef) { + // dupli was already a PHI on entry to MBB. Simply insert an openli PHI, + // and shift the dupli def down to the COPY. + VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false, + lis_.getVNInfoAllocator()); + VNI->setIsPHIDef(true); + openli_->addRange(LiveRange(VNI->def, Idx, VNI)); + + dupli_->removeRange(Start, Idx); + DupLR->valno->def = Idx; + DupLR->valno->setIsPHIDef(false); + } else { + // The dupli value was defined somewhere inside the openli range. + DEBUG(dbgs() << " leaveIntvAtTop source value defined at " + << DupLR->valno->def << "\n"); + // FIXME: We may not need a PHI here if all predecessors have the same + // value. + VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false, + lis_.getVNInfoAllocator()); + VNI->setIsPHIDef(true); + openli_->addRange(LiveRange(VNI->def, Idx, VNI)); + + // FIXME: What if DupLR->valno is used by multiple exits? SSA Update. + + // closeIntv is going to remove the superfluous live ranges. + DupLR->valno->def = Idx; + DupLR->valno->setIsPHIDef(false); + } + + DEBUG(dbgs() << " leaveIntvAtTop at " << Idx << ": " << *openli_ << '\n'); +} + +/// closeIntv - Indicate that we are done editing the currently open +/// LiveInterval, and ranges can be trimmed. +void SplitEditor::closeIntv() { + assert(openli_ && "openIntv not called before closeIntv"); + + DEBUG(dbgs() << " closeIntv cleaning up\n"); + DEBUG(dbgs() << " open " << *openli_ << '\n'); + + if (liveThrough_) { + DEBUG(dbgs() << " value live through region, leaving dupli as is.\n"); + } else { + // live out with copies inserted, or killed by region. Either way we need to + // remove the overlapping region from dupli. + getDupLI(); + for (LiveInterval::iterator I = openli_->begin(), E = openli_->end(); + I != E; ++I) { + dupli_->removeRange(I->start, I->end); + } + // FIXME: A block branching to the entry block may also branch elsewhere + // curli is live. We need both openli and curli to be live in that case. + DEBUG(dbgs() << " dup2 " << *dupli_ << '\n'); + } + openli_ = 0; + valueMap_.clear(); +} + +/// rewrite - after all the new live ranges have been created, rewrite +/// instructions using curli to use the new intervals. +void SplitEditor::rewrite() { + assert(!openli_ && "Previous LI not closed before rewrite"); + const LiveInterval *curli = sa_.getCurLI(); + for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(curli->reg), + RE = mri_.reg_end(); RI != RE;) { + MachineOperand &MO = RI.getOperand(); + MachineInstr *MI = MO.getParent(); + ++RI; + if (MI->isDebugValue()) { + DEBUG(dbgs() << "Zapping " << *MI); + // FIXME: We can do much better with debug values. + MO.setReg(0); + continue; + } + SlotIndex Idx = lis_.getInstructionIndex(MI); + Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + LiveInterval *LI = dupli_; + for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) { + LiveInterval *testli = intervals_[i]; + if (testli->liveAt(Idx)) { + LI = testli; + break; + } + } + if (LI) { + MO.setReg(LI->reg); + sa_.removeUse(MI); + DEBUG(dbgs() << " rewrite " << Idx << '\t' << *MI); + } + } + + // dupli_ goes in last, after rewriting. + if (dupli_) { + if (dupli_->empty()) { + DEBUG(dbgs() << " dupli became empty?\n"); + lis_.removeInterval(dupli_->reg); + dupli_ = 0; + } else { + dupli_->RenumberValues(lis_); + intervals_.push_back(dupli_); + } + } + + // Calculate spill weight and allocation hints for new intervals. + VirtRegAuxInfo vrai(vrm_.getMachineFunction(), lis_, sa_.loops_); + for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) { + LiveInterval &li = *intervals_[i]; + vrai.CalculateRegClass(li.reg); + vrai.CalculateWeightAndHint(li); + DEBUG(dbgs() << " new interval " << mri_.getRegClass(li.reg)->getName() + << ":" << li << '\n'); + } +} + + +//===----------------------------------------------------------------------===// +// Loop Splitting +//===----------------------------------------------------------------------===// + +bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) { + SplitAnalysis::LoopBlocks Blocks; + sa_.getLoopBlocks(Loop, Blocks); + + // Break critical edges as needed. + SplitAnalysis::BlockPtrSet CriticalExits; + sa_.getCriticalExits(Blocks, CriticalExits); + assert(CriticalExits.empty() && "Cannot break critical exits yet"); + + // Create new live interval for the loop. + openIntv(); + + // Insert copies in the predecessors. + for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Preds.begin(), + E = Blocks.Preds.end(); I != E; ++I) { + MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I); + enterIntvAtEnd(MBB, *Loop->getHeader()); + } + + // Switch all loop blocks. + for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Loop.begin(), + E = Blocks.Loop.end(); I != E; ++I) + useIntv(**I); + + // Insert back copies in the exit blocks. + for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Exits.begin(), + E = Blocks.Exits.end(); I != E; ++I) { + MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I); + leaveIntvAtTop(MBB); + } + + // Done. + closeIntv(); + rewrite(); + return dupli_; +} + + +//===----------------------------------------------------------------------===// +// Single Block Splitting +//===----------------------------------------------------------------------===// + +/// splitSingleBlocks - Split curli into a separate live interval inside each +/// basic block in Blocks. Return true if curli has been completely replaced, +/// false if curli is still intact, and needs to be spilled or split further. +bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { + DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n"); + // Determine the first and last instruction using curli in each block. + typedef std::pair<SlotIndex,SlotIndex> IndexPair; + typedef DenseMap<const MachineBasicBlock*,IndexPair> IndexPairMap; + IndexPairMap MBBRange; + for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(), + E = sa_.usingInstrs_.end(); I != E; ++I) { + const MachineBasicBlock *MBB = (*I)->getParent(); + if (!Blocks.count(MBB)) + continue; + SlotIndex Idx = lis_.getInstructionIndex(*I); + DEBUG(dbgs() << " BB#" << MBB->getNumber() << '\t' << Idx << '\t' << **I); + IndexPair &IP = MBBRange[MBB]; + if (!IP.first.isValid() || Idx < IP.first) + IP.first = Idx; + if (!IP.second.isValid() || Idx > IP.second) + IP.second = Idx; + } + + // Create a new interval for each block. + for (SplitAnalysis::BlockPtrSet::const_iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) { + IndexPair &IP = MBBRange[*I]; + DEBUG(dbgs() << " splitting for BB#" << (*I)->getNumber() << ": [" + << IP.first << ';' << IP.second << ")\n"); + assert(IP.first.isValid() && IP.second.isValid()); + + openIntv(); + enterIntvBefore(IP.first); + useIntv(IP.first.getBaseIndex(), IP.second.getBoundaryIndex()); + leaveIntvAfter(IP.second); + closeIntv(); + } + rewrite(); + return dupli_; +} + + +//===----------------------------------------------------------------------===// +// Sub Block Splitting +//===----------------------------------------------------------------------===// + +/// getBlockForInsideSplit - If curli is contained inside a single basic block, +/// and it wou pay to subdivide the interval inside that block, return it. +/// Otherwise return NULL. The returned block can be passed to +/// SplitEditor::splitInsideBlock. +const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() { + // The interval must be exclusive to one block. + if (usingBlocks_.size() != 1) + return 0; + // Don't to this for less than 4 instructions. We want to be sure that + // splitting actually reduces the instruction count per interval. + if (usingInstrs_.size() < 4) + return 0; + return usingBlocks_.begin()->first; +} + +/// splitInsideBlock - Split curli into multiple intervals inside MBB. Return +/// true if curli has been completely replaced, false if curli is still +/// intact, and needs to be spilled or split further. +bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) { + SmallVector<SlotIndex, 32> Uses; + Uses.reserve(sa_.usingInstrs_.size()); + for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(), + E = sa_.usingInstrs_.end(); I != E; ++I) + if ((*I)->getParent() == MBB) + Uses.push_back(lis_.getInstructionIndex(*I)); + DEBUG(dbgs() << " splitInsideBlock BB#" << MBB->getNumber() << " for " + << Uses.size() << " instructions.\n"); + assert(Uses.size() >= 3 && "Need at least 3 instructions"); + array_pod_sort(Uses.begin(), Uses.end()); + + // Simple algorithm: Find the largest gap between uses as determined by slot + // indices. Create new intervals for instructions before the gap and after the + // gap. + unsigned bestPos = 0; + int bestGap = 0; + DEBUG(dbgs() << " dist (" << Uses[0]); + for (unsigned i = 1, e = Uses.size(); i != e; ++i) { + int g = Uses[i-1].distance(Uses[i]); + DEBUG(dbgs() << ") -" << g << "- (" << Uses[i]); + if (g > bestGap) + bestPos = i, bestGap = g; + } + DEBUG(dbgs() << "), best: -" << bestGap << "-\n"); + + // bestPos points to the first use after the best gap. + assert(bestPos > 0 && "Invalid gap"); + + // FIXME: Don't create intervals for low densities. + + // First interval before the gap. Don't create single-instr intervals. + if (bestPos > 1) { + openIntv(); + enterIntvBefore(Uses.front()); + useIntv(Uses.front().getBaseIndex(), Uses[bestPos-1].getBoundaryIndex()); + leaveIntvAfter(Uses[bestPos-1]); + closeIntv(); + } + + // Second interval after the gap. + if (bestPos < Uses.size()-1) { + openIntv(); + enterIntvBefore(Uses[bestPos]); + useIntv(Uses[bestPos].getBaseIndex(), Uses.back().getBoundaryIndex()); + leaveIntvAfter(Uses.back()); + closeIntv(); + } + + rewrite(); + return dupli_; +} diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h new file mode 100644 index 0000000..ddef746 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -0,0 +1,321 @@ +//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SplitAnalysis class as well as mutator functions for +// live range splitting. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SlotIndexes.h" + +namespace llvm { + +class LiveInterval; +class LiveIntervals; +class MachineInstr; +class MachineLoop; +class MachineLoopInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class VirtRegMap; +class VNInfo; + +/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting +/// opportunities. +class SplitAnalysis { +public: + const MachineFunction &mf_; + const LiveIntervals &lis_; + const MachineLoopInfo &loops_; + const TargetInstrInfo &tii_; + + // Instructions using the the current register. + typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet; + InstrPtrSet usingInstrs_; + + // The number of instructions using curli in each basic block. + typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap; + BlockCountMap usingBlocks_; + + // The number of basic block using curli in each loop. + typedef DenseMap<const MachineLoop*, unsigned> LoopCountMap; + LoopCountMap usingLoops_; + +private: + // Current live interval. + const LiveInterval *curli_; + + // Sumarize statistics by counting instructions using curli_. + void analyzeUses(); + + /// canAnalyzeBranch - Return true if MBB ends in a branch that can be + /// analyzed. + bool canAnalyzeBranch(const MachineBasicBlock *MBB); + +public: + SplitAnalysis(const MachineFunction &mf, const LiveIntervals &lis, + const MachineLoopInfo &mli); + + /// analyze - set curli to the specified interval, and analyze how it may be + /// split. + void analyze(const LiveInterval *li); + + /// removeUse - Update statistics by noting that mi no longer uses curli. + void removeUse(const MachineInstr *mi); + + const LiveInterval *getCurLI() { return curli_; } + + /// clear - clear all data structures so SplitAnalysis is ready to analyze a + /// new interval. + void clear(); + + typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; + typedef SmallPtrSet<const MachineLoop*, 16> LoopPtrSet; + + // Sets of basic blocks surrounding a machine loop. + struct LoopBlocks { + BlockPtrSet Loop; // Blocks in the loop. + BlockPtrSet Preds; // Loop predecessor blocks. + BlockPtrSet Exits; // Loop exit blocks. + + void clear() { + Loop.clear(); + Preds.clear(); + Exits.clear(); + } + }; + + // Calculate the block sets surrounding the loop. + void getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks); + + /// LoopPeripheralUse - how is a variable used in and around a loop? + /// Peripheral blocks are the loop predecessors and exit blocks. + enum LoopPeripheralUse { + ContainedInLoop, // All uses are inside the loop. + SinglePeripheral, // At most one instruction per peripheral block. + MultiPeripheral, // Multiple instructions in some peripheral blocks. + OutsideLoop // Uses outside loop periphery. + }; + + /// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in + /// and around the Loop. + LoopPeripheralUse analyzeLoopPeripheralUse(const LoopBlocks&); + + /// getCriticalExits - It may be necessary to partially break critical edges + /// leaving the loop if an exit block has phi uses of curli. Collect the exit + /// blocks that need special treatment into CriticalExits. + void getCriticalExits(const LoopBlocks &Blocks, BlockPtrSet &CriticalExits); + + /// canSplitCriticalExits - Return true if it is possible to insert new exit + /// blocks before the blocks in CriticalExits. + bool canSplitCriticalExits(const LoopBlocks &Blocks, + BlockPtrSet &CriticalExits); + + /// getBestSplitLoop - Return the loop where curli may best be split to a + /// separate register, or NULL. + const MachineLoop *getBestSplitLoop(); + + /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from + /// having curli split to a new live interval. Return true if Blocks can be + /// passed to SplitEditor::splitSingleBlocks. + bool getMultiUseBlocks(BlockPtrSet &Blocks); + + /// getBlockForInsideSplit - If curli is contained inside a single basic block, + /// and it wou pay to subdivide the interval inside that block, return it. + /// Otherwise return NULL. The returned block can be passed to + /// SplitEditor::splitInsideBlock. + const MachineBasicBlock *getBlockForInsideSplit(); +}; + + +/// LiveIntervalMap - Map values from a large LiveInterval into a small +/// interval that is a subset. Insert phi-def values as needed. This class is +/// used by SplitEditor to create new smaller LiveIntervals. +/// +/// parentli_ is the larger interval, li_ is the subset interval. Every value +/// in li_ corresponds to exactly one value in parentli_, and the live range +/// of the value is contained within the live range of the parentli_ value. +/// Values in parentli_ may map to any number of openli_ values, including 0. +class LiveIntervalMap { + LiveIntervals &lis_; + + // The parent interval is never changed. + const LiveInterval &parentli_; + + // The child interval's values are fully contained inside parentli_ values. + LiveInterval &li_; + + typedef DenseMap<const VNInfo*, VNInfo*> ValueMap; + + // Map parentli_ values to simple values in li_ that are defined at the same + // SlotIndex, or NULL for parentli_ values that have complex li_ defs. + // Note there is a difference between values mapping to NULL (complex), and + // values not present (unknown/unmapped). + ValueMap valueMap_; + + // extendTo - Find the last li_ value defined in MBB at or before Idx. The + // parentli_ is assumed to be live at Idx. Extend the live range to Idx. + // Return the found VNInfo, or NULL. + VNInfo *extendTo(MachineBasicBlock *MBB, SlotIndex Idx); + + // addSimpleRange - Add a simple range from parentli_ to li_. + // ParentVNI must be live in the [Start;End) interval. + void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI); + +public: + LiveIntervalMap(LiveIntervals &lis, + const LiveInterval &parentli, + LiveInterval &li) + : lis_(lis), parentli_(parentli), li_(li) {} + + /// defValue - define a value in li_ from the parentli_ value VNI and Idx. + /// Idx does not have to be ParentVNI->def, but it must be contained within + /// ParentVNI's live range in parentli_. + /// Return the new li_ value. + VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx); + + /// mapValue - map ParentVNI to the corresponding li_ value at Idx. It is + /// assumed that ParentVNI is live at Idx. + /// If ParentVNI has not been defined by defValue, it is assumed that + /// ParentVNI->def dominates Idx. + /// If ParentVNI has been defined by defValue one or more times, a value that + /// dominates Idx will be returned. This may require creating extra phi-def + /// values and adding live ranges to li_. + VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx); + + /// addRange - Add live ranges to li_ where [Start;End) intersects parentli_. + /// All needed values whose def is not inside [Start;End) must be defined + /// beforehand so mapValue will work. + void addRange(SlotIndex Start, SlotIndex End); +}; + + +/// SplitEditor - Edit machine code and LiveIntervals for live range +/// splitting. +/// +/// - Create a SplitEditor from a SplitAnalysis. +/// - Start a new live interval with openIntv. +/// - Mark the places where the new interval is entered using enterIntv* +/// - Mark the ranges where the new interval is used with useIntv* +/// - Mark the places where the interval is exited with exitIntv*. +/// - Finish the current interval with closeIntv and repeat from 2. +/// - Rewrite instructions with rewrite(). +/// +class SplitEditor { + SplitAnalysis &sa_; + LiveIntervals &lis_; + VirtRegMap &vrm_; + MachineRegisterInfo &mri_; + const TargetInstrInfo &tii_; + + /// curli_ - The immutable interval we are currently splitting. + const LiveInterval *const curli_; + + /// dupli_ - Created as a copy of curli_, ranges are carved out as new + /// intervals get added through openIntv / closeIntv. This is used to avoid + /// editing curli_. + LiveInterval *dupli_; + + /// Currently open LiveInterval. + LiveInterval *openli_; + + /// createInterval - Create a new virtual register and LiveInterval with same + /// register class and spill slot as curli. + LiveInterval *createInterval(); + + /// getDupLI - Ensure dupli is created and return it. + LiveInterval *getDupLI(); + + /// valueMap_ - Map values in cupli to values in openli. These are direct 1-1 + /// mappings, and do not include values created by inserted copies. + DenseMap<const VNInfo*, VNInfo*> valueMap_; + + /// mapValue - Return the openIntv value that corresponds to the given curli + /// value. + VNInfo *mapValue(const VNInfo *curliVNI); + + /// A dupli value is live through openIntv. + bool liveThrough_; + + /// All the new intervals created for this split are added to intervals_. + SmallVectorImpl<LiveInterval*> &intervals_; + + /// The index into intervals_ of the first interval we added. There may be + /// others from before we got it. + unsigned firstInterval; + + /// Insert a COPY instruction curli -> li. Allocate a new value from li + /// defined by the COPY + VNInfo *insertCopy(LiveInterval &LI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I); + +public: + /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. + /// Newly created intervals will be appended to newIntervals. + SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, + SmallVectorImpl<LiveInterval*> &newIntervals); + + /// getAnalysis - Get the corresponding analysis. + SplitAnalysis &getAnalysis() { return sa_; } + + /// Create a new virtual register and live interval. + void openIntv(); + + /// enterIntvBefore - Enter openli before the instruction at Idx. If curli is + /// not live before Idx, a COPY is not inserted. + void enterIntvBefore(SlotIndex Idx); + + /// enterIntvAtEnd - Enter openli at the end of MBB. + /// PhiMBB is a successor inside openli where a PHI value is created. + /// Currently, all entries must share the same PhiMBB. + void enterIntvAtEnd(MachineBasicBlock &MBB, MachineBasicBlock &PhiMBB); + + /// useIntv - indicate that all instructions in MBB should use openli. + void useIntv(const MachineBasicBlock &MBB); + + /// useIntv - indicate that all instructions in range should use openli. + void useIntv(SlotIndex Start, SlotIndex End); + + /// leaveIntvAfter - Leave openli after the instruction at Idx. + void leaveIntvAfter(SlotIndex Idx); + + /// leaveIntvAtTop - Leave the interval at the top of MBB. + /// Currently, only one value can leave the interval. + void leaveIntvAtTop(MachineBasicBlock &MBB); + + /// closeIntv - Indicate that we are done editing the currently open + /// LiveInterval, and ranges can be trimmed. + void closeIntv(); + + /// rewrite - after all the new live ranges have been created, rewrite + /// instructions using curli to use the new intervals. + void rewrite(); + + // ===--- High level methods ---=== + + /// splitAroundLoop - Split curli into a separate live interval inside + /// the loop. Return true if curli has been completely replaced, false if + /// curli is still intact, and needs to be spilled or split further. + bool splitAroundLoop(const MachineLoop*); + + /// splitSingleBlocks - Split curli into a separate live interval inside each + /// basic block in Blocks. Return true if curli has been completely replaced, + /// false if curli is still intact, and needs to be spilled or split further. + bool splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); + + /// splitInsideBlock - Split curli into multiple intervals inside MBB. Return + /// true if curli has been completely replaced, false if curli is still + /// intact, and needs to be spilled or split further. + bool splitInsideBlock(const MachineBasicBlock *); +}; + +} diff --git a/contrib/llvm/lib/CodeGen/Splitter.cpp b/contrib/llvm/lib/CodeGen/Splitter.cpp new file mode 100644 index 0000000..38f3b1f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Splitter.cpp @@ -0,0 +1,817 @@ +//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loopsplitter" + +#include "Splitter.h" + +#include "SimpleRegisterCoalescing.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +char LoopSplitter::ID = 0; +INITIALIZE_PASS(LoopSplitter, "loop-splitting", + "Split virtual regists across loop boundaries.", false, false); + +namespace llvm { + + class StartSlotComparator { + public: + StartSlotComparator(LiveIntervals &lis) : lis(lis) {} + bool operator()(const MachineBasicBlock *mbb1, + const MachineBasicBlock *mbb2) const { + return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2); + } + private: + LiveIntervals &lis; + }; + + class LoopSplit { + public: + LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop) + : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) { + assert(TargetRegisterInfo::isVirtualRegister(li.reg) && + "Cannot split physical registers."); + } + + LiveInterval& getLI() const { return li; } + + MachineLoop& getLoop() const { return loop; } + + bool isValid() const { return valid; } + + bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); } + + void invalidate() { valid = false; } + + void splitIncoming() { inSplit = true; } + + void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); } + + void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); } + + void apply() { + assert(valid && "Attempt to apply invalid split."); + applyIncoming(); + applyOutgoing(); + copyRanges(); + renameInside(); + } + + private: + LoopSplitter &ls; + LiveInterval &li; + MachineLoop &loop; + bool valid, inSplit; + std::set<MachineLoop::Edge> outSplits; + std::vector<MachineInstr*> loopInstrs; + + LiveInterval *newLI; + std::map<VNInfo*, VNInfo*> vniMap; + + LiveInterval* getNewLI() { + if (newLI == 0) { + const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg); + unsigned vreg = ls.mri->createVirtualRegister(trc); + newLI = &ls.lis->getOrCreateInterval(vreg); + } + return newLI; + } + + VNInfo* getNewVNI(VNInfo *oldVNI) { + VNInfo *newVNI = vniMap[oldVNI]; + + if (newVNI == 0) { + newVNI = getNewLI()->createValueCopy(oldVNI, + ls.lis->getVNInfoAllocator()); + vniMap[oldVNI] = newVNI; + } + + return newVNI; + } + + void applyIncoming() { + if (!inSplit) { + return; + } + + MachineBasicBlock *preHeader = loop.getLoopPreheader(); + if (preHeader == 0) { + assert(ls.canInsertPreHeader(loop) && + "Can't insert required preheader."); + preHeader = &ls.insertPreHeader(loop); + } + + LiveRange *preHeaderRange = + ls.lis->findExitingRange(li, preHeader); + assert(preHeaderRange != 0 && "Range not live into preheader."); + + // Insert the new copy. + MachineInstr *copy = BuildMI(*preHeader, + preHeader->getFirstTerminator(), + DebugLoc(), + ls.tii->get(TargetOpcode::COPY)) + .addReg(getNewLI()->reg, RegState::Define) + .addReg(li.reg, RegState::Kill); + + ls.lis->InsertMachineInstrInMaps(copy); + + SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); + + VNInfo *newVal = getNewVNI(preHeaderRange->valno); + newVal->def = copyDefIdx; + newVal->setCopy(copy); + newVal->setIsDefAccurate(true); + li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true); + + getNewLI()->addRange(LiveRange(copyDefIdx, + ls.lis->getMBBEndIdx(preHeader), + newVal)); + } + + void applyOutgoing() { + + for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(), + osEnd = outSplits.end(); + osItr != osEnd; ++osItr) { + MachineLoop::Edge edge = *osItr; + MachineBasicBlock *outBlock = edge.second; + if (ls.isCriticalEdge(edge)) { + assert(ls.canSplitEdge(edge) && "Unsplitable critical edge."); + outBlock = &ls.splitEdge(edge, loop); + } + LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock); + assert(outRange != 0 && "No exiting range?"); + + MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(), + DebugLoc(), + ls.tii->get(TargetOpcode::COPY)) + .addReg(li.reg, RegState::Define) + .addReg(getNewLI()->reg, RegState::Kill); + + ls.lis->InsertMachineInstrInMaps(copy); + + SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); + + // Blow away output range definition. + outRange->valno->def = ls.lis->getInvalidIndex(); + outRange->valno->setIsDefAccurate(false); + li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx); + + VNInfo *newVal = + getNewLI()->getNextValue(SlotIndex(ls.lis->getMBBStartIdx(outBlock), + true), + 0, false, ls.lis->getVNInfoAllocator()); + + getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock), + copyDefIdx, newVal)); + + } + } + + void copyRange(LiveRange &lr) { + std::pair<bool, LoopSplitter::SlotPair> lsr = + ls.getLoopSubRange(lr, loop); + + if (!lsr.first) + return; + + LiveRange loopRange(lsr.second.first, lsr.second.second, + getNewVNI(lr.valno)); + + li.removeRange(loopRange.start, loopRange.end, true); + + getNewLI()->addRange(loopRange); + } + + void copyRanges() { + for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(), + iEnd = loopInstrs.end(); + iItr != iEnd; ++iItr) { + MachineInstr &instr = **iItr; + SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr); + if (instr.modifiesRegister(li.reg, 0)) { + LiveRange *defRange = + li.getLiveRangeContaining(instrIdx.getDefIndex()); + if (defRange != 0) // May have caught this already. + copyRange(*defRange); + } + if (instr.readsRegister(li.reg, 0)) { + LiveRange *useRange = + li.getLiveRangeContaining(instrIdx.getUseIndex()); + if (useRange != 0) { // May have caught this already. + copyRange(*useRange); + } + } + } + + for (MachineLoop::block_iterator bbItr = loop.block_begin(), + bbEnd = loop.block_end(); + bbItr != bbEnd; ++bbItr) { + MachineBasicBlock &loopBlock = **bbItr; + LiveRange *enteringRange = + ls.lis->findEnteringRange(li, &loopBlock); + if (enteringRange != 0) { + copyRange(*enteringRange); + } + } + } + + void renameInside() { + for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(), + iEnd = loopInstrs.end(); + iItr != iEnd; ++iItr) { + MachineInstr &instr = **iItr; + for (unsigned i = 0; i < instr.getNumOperands(); ++i) { + MachineOperand &mop = instr.getOperand(i); + if (mop.isReg() && mop.getReg() == li.reg) { + mop.setReg(getNewLI()->reg); + } + } + } + } + + }; + + void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<MachineDominatorTree>(); + au.addPreserved<MachineDominatorTree>(); + au.addRequired<MachineLoopInfo>(); + au.addPreserved<MachineLoopInfo>(); + au.addPreserved<RegisterCoalescer>(); + au.addPreserved<CalculateSpillWeights>(); + au.addPreserved<LiveStacks>(); + au.addRequired<SlotIndexes>(); + au.addPreserved<SlotIndexes>(); + au.addRequired<LiveIntervals>(); + au.addPreserved<LiveIntervals>(); + MachineFunctionPass::getAnalysisUsage(au); + } + + bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) { + + mf = &fn; + mri = &mf->getRegInfo(); + tii = mf->getTarget().getInstrInfo(); + tri = mf->getTarget().getRegisterInfo(); + sis = &getAnalysis<SlotIndexes>(); + lis = &getAnalysis<LiveIntervals>(); + mli = &getAnalysis<MachineLoopInfo>(); + mdt = &getAnalysis<MachineDominatorTree>(); + + fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + + mf->getFunction()->getName().str(); + + dbgs() << "Splitting " << mf->getFunction()->getName() << "."; + + dumpOddTerminators(); + +// dbgs() << "----------------------------------------\n"; +// lis->dump(); +// dbgs() << "----------------------------------------\n"; + +// std::deque<MachineLoop*> loops; +// std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); +// dbgs() << "Loops:\n"; +// while (!loops.empty()) { +// MachineLoop &loop = *loops.front(); +// loops.pop_front(); +// std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); + +// dumpLoopInfo(loop); +// } + + //lis->dump(); + //exit(0); + + // Setup initial intervals. + for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval *li = liItr->second; + + if (TargetRegisterInfo::isVirtualRegister(li->reg) && + !lis->intervalIsInOneMBB(*li)) { + intervals.push_back(li); + } + } + + processIntervals(); + + intervals.clear(); + +// dbgs() << "----------------------------------------\n"; +// lis->dump(); +// dbgs() << "----------------------------------------\n"; + + dumpOddTerminators(); + + //exit(1); + + return false; + } + + void LoopSplitter::releaseMemory() { + fqn.clear(); + intervals.clear(); + loopRangeMap.clear(); + } + + void LoopSplitter::dumpOddTerminators() { + for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end(); + bbItr != bbEnd; ++bbItr) { + MachineBasicBlock *mbb = &*bbItr; + MachineBasicBlock *a = 0, *b = 0; + SmallVector<MachineOperand, 4> c; + if (tii->AnalyzeBranch(*mbb, a, b, c)) { + dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n"; + dbgs() << " Terminators:\n"; + for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end(); + iItr != iEnd; ++iItr) { + MachineInstr *instr= &*iItr; + dbgs() << " " << *instr << ""; + } + dbgs() << "\n Listed successors: [ "; + for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end(); + sItr != sEnd; ++sItr) { + MachineBasicBlock *succMBB = *sItr; + dbgs() << succMBB->getNumber() << " "; + } + dbgs() << "]\n\n"; + } + } + } + + void LoopSplitter::dumpLoopInfo(MachineLoop &loop) { + MachineBasicBlock &headerBlock = *loop.getHeader(); + typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList; + ExitEdgesList exitEdges; + loop.getExitEdges(exitEdges); + + dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ "; + for (std::vector<MachineBasicBlock*>::const_iterator + subBlockItr = loop.getBlocks().begin(), + subBlockEnd = loop.getBlocks().end(); + subBlockItr != subBlockEnd; ++subBlockItr) { + MachineBasicBlock &subBlock = **subBlockItr; + dbgs() << "BB#" << subBlock.getNumber() << " "; + } + dbgs() << "], Exit edges: [ "; + for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), + exitEdgeEnd = exitEdges.end(); + exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { + MachineLoop::Edge &exitEdge = *exitEdgeItr; + dbgs() << "(MBB#" << exitEdge.first->getNumber() + << ", MBB#" << exitEdge.second->getNumber() << ") "; + } + dbgs() << "], Sub-Loop Headers: [ "; + for (MachineLoop::iterator subLoopItr = loop.begin(), + subLoopEnd = loop.end(); + subLoopItr != subLoopEnd; ++subLoopItr) { + MachineLoop &subLoop = **subLoopItr; + MachineBasicBlock &subLoopBlock = *subLoop.getHeader(); + dbgs() << "BB#" << subLoopBlock.getNumber() << " "; + } + dbgs() << "]\n"; + } + + void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) { + mbb.updateTerminator(); + + for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end(); + miItr != miEnd; ++miItr) { + if (lis->isNotInMIMap(miItr)) { + lis->InsertMachineInstrInMaps(miItr); + } + } + } + + bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) { + MachineBasicBlock *header = loop.getHeader(); + MachineBasicBlock *a = 0, *b = 0; + SmallVector<MachineOperand, 4> c; + + for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(), + pbEnd = header->pred_end(); + pbItr != pbEnd; ++pbItr) { + MachineBasicBlock *predBlock = *pbItr; + if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) { + return false; + } + } + + MachineFunction::iterator headerItr(header); + if (headerItr == mf->begin()) + return true; + MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr); + assert(headerLayoutPred != 0 && "Header should have layout pred."); + + return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c)); + } + + MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) { + assert(loop.getLoopPreheader() == 0 && "Loop already has preheader."); + + MachineBasicBlock &header = *loop.getHeader(); + + // Save the preds - we'll need to update them once we insert the preheader. + typedef std::set<MachineBasicBlock*> HeaderPreds; + HeaderPreds headerPreds; + + for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), + predEnd = header.pred_end(); + predItr != predEnd; ++predItr) { + if (!loop.contains(*predItr)) + headerPreds.insert(*predItr); + } + + assert(!headerPreds.empty() && "No predecessors for header?"); + + //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader..."; + + MachineBasicBlock *preHeader = + mf->CreateMachineBasicBlock(header.getBasicBlock()); + + assert(preHeader != 0 && "Failed to create pre-header."); + + mf->insert(header, preHeader); + + for (HeaderPreds::iterator hpItr = headerPreds.begin(), + hpEnd = headerPreds.end(); + hpItr != hpEnd; ++hpItr) { + assert(*hpItr != 0 && "How'd a null predecessor get into this set?"); + MachineBasicBlock &hp = **hpItr; + hp.ReplaceUsesOfBlockWith(&header, preHeader); + } + preHeader->addSuccessor(&header); + + MachineBasicBlock *oldLayoutPred = + llvm::prior(MachineFunction::iterator(preHeader)); + if (oldLayoutPred != 0) { + updateTerminators(*oldLayoutPred); + } + + lis->InsertMBBInMaps(preHeader); + + if (MachineLoop *parentLoop = loop.getParentLoop()) { + assert(parentLoop->getHeader() != loop.getHeader() && + "Parent loop has same header?"); + parentLoop->addBasicBlockToLoop(preHeader, mli->getBase()); + + // Invalidate all parent loop ranges. + while (parentLoop != 0) { + loopRangeMap.erase(parentLoop); + parentLoop = parentLoop->getParentLoop(); + } + } + + for (LiveIntervals::iterator liItr = lis->begin(), + liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval &li = *liItr->second; + + // Is this safe for physregs? + // TargetRegisterInfo::isPhysicalRegister(li.reg) || + if (!lis->isLiveInToMBB(li, &header)) + continue; + + if (lis->isLiveInToMBB(li, preHeader)) { + assert(lis->isLiveOutOfMBB(li, preHeader) && + "Range terminates in newly added preheader?"); + continue; + } + + bool insertRange = false; + + for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(), + predEnd = preHeader->pred_end(); + predItr != predEnd; ++predItr) { + MachineBasicBlock *predMBB = *predItr; + if (lis->isLiveOutOfMBB(li, predMBB)) { + insertRange = true; + break; + } + } + + if (!insertRange) + continue; + + VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(preHeader), + 0, false, lis->getVNInfoAllocator()); + li.addRange(LiveRange(lis->getMBBStartIdx(preHeader), + lis->getMBBEndIdx(preHeader), + newVal)); + } + + + //dbgs() << "Dumping SlotIndexes:\n"; + //sis->dump(); + + //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n"; + + return *preHeader; + } + + bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) { + assert(edge.first->succ_size() > 1 && "Non-sensical edge."); + if (edge.second->pred_size() > 1) + return true; + return false; + } + + bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) { + MachineFunction::iterator outBlockItr(edge.second); + if (outBlockItr == mf->begin()) + return true; + MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr); + assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin."); + MachineBasicBlock *a = 0, *b = 0; + SmallVector<MachineOperand, 4> c; + return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) && + !tii->AnalyzeBranch(*edge.first, a, b, c)); + } + + MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge, + MachineLoop &loop) { + + MachineBasicBlock &inBlock = *edge.first; + MachineBasicBlock &outBlock = *edge.second; + + assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) && + "Splitting non-critical edge?"); + + //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber() + // << " -> MBB#" << outBlock.getNumber() << ")..."; + + MachineBasicBlock *splitBlock = + mf->CreateMachineBasicBlock(); + + assert(splitBlock != 0 && "Failed to create split block."); + + mf->insert(&outBlock, splitBlock); + + inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock); + splitBlock->addSuccessor(&outBlock); + + MachineBasicBlock *oldLayoutPred = + llvm::prior(MachineFunction::iterator(splitBlock)); + if (oldLayoutPred != 0) { + updateTerminators(*oldLayoutPred); + } + + lis->InsertMBBInMaps(splitBlock); + + loopRangeMap.erase(&loop); + + MachineLoop *splitParentLoop = loop.getParentLoop(); + while (splitParentLoop != 0 && + !splitParentLoop->contains(&outBlock)) { + splitParentLoop = splitParentLoop->getParentLoop(); + } + + if (splitParentLoop != 0) { + assert(splitParentLoop->contains(&loop) && + "Split-block parent doesn't contain original loop?"); + splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase()); + + // Invalidate all parent loop ranges. + while (splitParentLoop != 0) { + loopRangeMap.erase(splitParentLoop); + splitParentLoop = splitParentLoop->getParentLoop(); + } + } + + + for (LiveIntervals::iterator liItr = lis->begin(), + liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval &li = *liItr->second; + bool intersects = lis->isLiveOutOfMBB(li, &inBlock) && + lis->isLiveInToMBB(li, &outBlock); + if (lis->isLiveInToMBB(li, splitBlock)) { + if (!intersects) { + li.removeRange(lis->getMBBStartIdx(splitBlock), + lis->getMBBEndIdx(splitBlock), true); + } + } else if (intersects) { + VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(splitBlock), + 0, false, lis->getVNInfoAllocator()); + li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock), + lis->getMBBEndIdx(splitBlock), + newVal)); + } + } + + //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n"; + + return *splitBlock; + } + + LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) { + typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet; + LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop); + if (lrItr == loopRangeMap.end()) { + LoopMBBSet loopMBBs((StartSlotComparator(*lis))); + std::copy(loop.block_begin(), loop.block_end(), + std::inserter(loopMBBs, loopMBBs.begin())); + + assert(!loopMBBs.empty() && "No blocks in loop?"); + + LoopRanges &loopRanges = loopRangeMap[&loop]; + assert(loopRanges.empty() && "Loop encountered but not processed?"); + SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin()); + loopRanges.push_back( + std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()), + lis->getInvalidIndex())); + for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()), + curBlockEnd = loopMBBs.end(); + curBlockItr != curBlockEnd; ++curBlockItr) { + SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr); + if (newStart != oldEnd) { + loopRanges.back().second = oldEnd; + loopRanges.push_back(std::make_pair(newStart, + lis->getInvalidIndex())); + } + oldEnd = lis->getMBBEndIdx(*curBlockItr); + } + + loopRanges.back().second = + lis->getMBBEndIdx(*llvm::prior(loopMBBs.end())); + + return loopRanges; + } + return lrItr->second; + } + + std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange( + const LiveRange &lr, + MachineLoop &loop) { + LoopRanges &loopRanges = getLoopRanges(loop); + LoopRanges::iterator lrItr = loopRanges.begin(), + lrEnd = loopRanges.end(); + while (lrItr != lrEnd && lr.start >= lrItr->second) { + ++lrItr; + } + + if (lrItr == lrEnd) { + SlotIndex invalid = lis->getInvalidIndex(); + return std::make_pair(false, SlotPair(invalid, invalid)); + } + + SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start); + SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end); + + return std::make_pair(true, SlotPair(srStart, srEnd)); + } + + void LoopSplitter::dumpLoopRanges(MachineLoop &loop) { + LoopRanges &loopRanges = getLoopRanges(loop); + dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ "; + for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end(); + lrItr != lrEnd; ++lrItr) { + dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") "; + } + dbgs() << "]\n"; + } + + void LoopSplitter::processHeader(LoopSplit &split) { + MachineBasicBlock &header = *split.getLoop().getHeader(); + //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n"; + + if (!lis->isLiveInToMBB(split.getLI(), &header)) + return; // Not live in, but nothing wrong so far. + + MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader(); + if (!preHeader) { + + if (!canInsertPreHeader(split.getLoop())) { + split.invalidate(); + return; // Couldn't insert a pre-header. Bail on this interval. + } + + for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), + predEnd = header.pred_end(); + predItr != predEnd; ++predItr) { + if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) { + split.splitIncoming(); + break; + } + } + } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) { + split.splitIncoming(); + } + } + + void LoopSplitter::processLoopExits(LoopSplit &split) { + typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList; + ExitEdgesList exitEdges; + split.getLoop().getExitEdges(exitEdges); + + //dbgs() << " Processing loop exits:\n"; + + for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), + exitEdgeEnd = exitEdges.end(); + exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { + MachineLoop::Edge exitEdge = *exitEdgeItr; + + LiveRange *outRange = + split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second)); + + if (outRange != 0) { + if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) { + split.invalidate(); + return; + } + + split.splitOutgoing(exitEdge); + } + } + } + + void LoopSplitter::processLoopUses(LoopSplit &split) { + std::set<MachineInstr*> processed; + + for (MachineRegisterInfo::reg_iterator + rItr = mri->reg_begin(split.getLI().reg), + rEnd = mri->reg_end(); + rItr != rEnd; ++rItr) { + MachineInstr &instr = *rItr; + if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) { + split.addLoopInstr(&instr); + processed.insert(&instr); + } + } + + //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg + // << " in blocks [ "; + //dbgs() << "]\n"; + } + + bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) { + assert(TargetRegisterInfo::isVirtualRegister(li.reg) && + "Attempt to split physical register."); + + LoopSplit split(*this, li, loop); + processHeader(split); + if (split.isValid()) + processLoopExits(split); + if (split.isValid()) + processLoopUses(split); + if (split.isValid() /* && split.isWorthwhile() */) { + split.apply(); + DEBUG(dbgs() << "Success.\n"); + return true; + } + DEBUG(dbgs() << "Failed.\n"); + return false; + } + + void LoopSplitter::processInterval(LiveInterval &li) { + std::deque<MachineLoop*> loops; + std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); + + while (!loops.empty()) { + MachineLoop &loop = *loops.front(); + loops.pop_front(); + DEBUG( + dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#" + << loop.getHeader()->getNumber() << " "; + ); + if (!splitOverLoop(li, loop)) { + // Couldn't split over outer loop, schedule sub-loops to be checked. + std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); + } + } + } + + void LoopSplitter::processIntervals() { + while (!intervals.empty()) { + LiveInterval &li = *intervals.front(); + intervals.pop_front(); + + assert(!lis->intervalIsInOneMBB(li) && + "Single interval in process worklist."); + + processInterval(li); + } + } + +} diff --git a/contrib/llvm/lib/CodeGen/Splitter.h b/contrib/llvm/lib/CodeGen/Splitter.h new file mode 100644 index 0000000..a726a7b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Splitter.h @@ -0,0 +1,99 @@ +//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SPLITTER_H +#define LLVM_CODEGEN_SPLITTER_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" + +#include <deque> +#include <map> +#include <string> +#include <vector> + +namespace llvm { + + class LiveInterval; + class LiveIntervals; + struct LiveRange; + class LoopSplit; + class MachineDominatorTree; + class MachineRegisterInfo; + class SlotIndexes; + class TargetInstrInfo; + class VNInfo; + + class LoopSplitter : public MachineFunctionPass { + friend class LoopSplit; + public: + static char ID; + + LoopSplitter() : MachineFunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + virtual bool runOnMachineFunction(MachineFunction &fn); + + virtual void releaseMemory(); + + + private: + + MachineFunction *mf; + LiveIntervals *lis; + MachineLoopInfo *mli; + MachineRegisterInfo *mri; + MachineDominatorTree *mdt; + SlotIndexes *sis; + const TargetInstrInfo *tii; + const TargetRegisterInfo *tri; + + std::string fqn; + std::deque<LiveInterval*> intervals; + + typedef std::pair<SlotIndex, SlotIndex> SlotPair; + typedef std::vector<SlotPair> LoopRanges; + typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap; + LoopRangeMap loopRangeMap; + + void dumpLoopInfo(MachineLoop &loop); + + void dumpOddTerminators(); + + void updateTerminators(MachineBasicBlock &mbb); + + bool canInsertPreHeader(MachineLoop &loop); + MachineBasicBlock& insertPreHeader(MachineLoop &loop); + + bool isCriticalEdge(MachineLoop::Edge &edge); + bool canSplitEdge(MachineLoop::Edge &edge); + MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop); + + LoopRanges& getLoopRanges(MachineLoop &loop); + std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr, + MachineLoop &loop); + + void dumpLoopRanges(MachineLoop &loop); + + void processHeader(LoopSplit &split); + void processLoopExits(LoopSplit &split); + void processLoopUses(LoopSplit &split); + + bool splitOverLoop(LiveInterval &li, MachineLoop &loop); + + void processInterval(LiveInterval &li); + + void processIntervals(); + }; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp new file mode 100644 index 0000000..9f51778 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -0,0 +1,240 @@ +//===-- StackProtector.cpp - Stack Protector Insertion --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass inserts stack protectors into functions which need them. A variable +// with a random value in it is stored onto the stack before the local variables +// are allocated. Upon exiting the block, the stored value is checked. If it's +// changed, then there was some sort of violation and the program aborts. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stack-protector" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Attributes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +// SSPBufferSize - The lower bound for a buffer to be considered for stack +// smashing protection. +static cl::opt<unsigned> +SSPBufferSize("stack-protector-buffer-size", cl::init(8), + cl::desc("Lower bound for a buffer to be considered for " + "stack protection")); + +namespace { + class StackProtector : public FunctionPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// target type sizes. + const TargetLowering *TLI; + + Function *F; + Module *M; + + /// InsertStackProtectors - Insert code into the prologue and epilogue of + /// the function. + /// + /// - The prologue code loads and stores the stack guard onto the stack. + /// - The epilogue checks the value stored in the prologue against the + /// original value. It calls __stack_chk_fail if they differ. + bool InsertStackProtectors(); + + /// CreateFailBB - Create a basic block to jump to when the stack protector + /// check fails. + BasicBlock *CreateFailBB(); + + /// RequiresStackProtector - Check whether or not this function needs a + /// stack protector based upon the stack protector level. + bool RequiresStackProtector() const; + public: + static char ID; // Pass identification, replacement for typeid. + StackProtector() : FunctionPass(ID), TLI(0) {} + StackProtector(const TargetLowering *tli) + : FunctionPass(ID), TLI(tli) {} + + virtual bool runOnFunction(Function &Fn); + }; +} // end anonymous namespace + +char StackProtector::ID = 0; +INITIALIZE_PASS(StackProtector, "stack-protector", + "Insert stack protectors", false, false); + +FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) { + return new StackProtector(tli); +} + +bool StackProtector::runOnFunction(Function &Fn) { + F = &Fn; + M = F->getParent(); + + if (!RequiresStackProtector()) return false; + + return InsertStackProtectors(); +} + +/// RequiresStackProtector - Check whether or not this function needs a stack +/// protector based upon the stack protector level. The heuristic we use is to +/// add a guard variable to functions that call alloca, and functions with +/// buffers larger than SSPBufferSize bytes. +bool StackProtector::RequiresStackProtector() const { + if (F->hasFnAttr(Attribute::StackProtectReq)) + return true; + + if (!F->hasFnAttr(Attribute::StackProtect)) + return false; + + const TargetData *TD = TLI->getTargetData(); + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + BasicBlock *BB = I; + + for (BasicBlock::iterator + II = BB->begin(), IE = BB->end(); II != IE; ++II) + if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (AI->isArrayAllocation()) + // This is a call to alloca with a variable size. Emit stack + // protectors. + return true; + + if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) { + // We apparently only care about character arrays. + if (!AT->getElementType()->isIntegerTy(8)) + continue; + + // If an array has more than SSPBufferSize bytes of allocated space, + // then we emit stack protectors. + if (SSPBufferSize <= TD->getTypeAllocSize(AT)) + return true; + } + } + } + + return false; +} + +/// InsertStackProtectors - Insert code into the prologue and epilogue of the +/// function. +/// +/// - The prologue code loads and stores the stack guard onto the stack. +/// - The epilogue checks the value stored in the prologue against the original +/// value. It calls __stack_chk_fail if they differ. +bool StackProtector::InsertStackProtectors() { + BasicBlock *FailBB = 0; // The basic block to jump to if check fails. + AllocaInst *AI = 0; // Place on stack that stores the stack guard. + Value *StackGuardVar = 0; // The stack guard variable. + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { + BasicBlock *BB = I++; + + ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); + if (!RI) continue; + + if (!FailBB) { + // Insert code into the entry block that stores the __stack_chk_guard + // variable onto the stack: + // + // entry: + // StackGuardSlot = alloca i8* + // StackGuard = load __stack_chk_guard + // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) + // + const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + unsigned AddressSpace, Offset; + if (TLI->getStackCookieLocation(AddressSpace, Offset)) { + Constant *OffsetVal = + ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); + + StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, + PointerType::get(PtrTy, AddressSpace)); + } else { + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + } + + BasicBlock &Entry = F->getEntryBlock(); + Instruction *InsPt = &Entry.front(); + + AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt); + LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt); + + Value *Args[] = { LI, AI }; + CallInst:: + Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), + &Args[0], array_endof(Args), "", InsPt); + + // Create the basic block to jump to when the guard check fails. + FailBB = CreateFailBB(); + } + + // For each block with a return instruction, convert this: + // + // return: + // ... + // ret ... + // + // into this: + // + // return: + // ... + // %1 = load __stack_chk_guard + // %2 = load StackGuardSlot + // %3 = cmp i1 %1, %2 + // br i1 %3, label %SP_return, label %CallStackCheckFailBlk + // + // SP_return: + // ret ... + // + // CallStackCheckFailBlk: + // call void @__stack_chk_fail() + // unreachable + + // Split the basic block before the return instruction. + BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + + // Remove default branch instruction to the new BB. + BB->getTerminator()->eraseFromParent(); + + // Move the newly created basic block to the point right after the old basic + // block so that it's in the "fall through" position. + NewBB->moveAfter(BB); + + // Generate the stack protector instructions in the old basic block. + LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB); + LoadInst *LI2 = new LoadInst(AI, "", true, BB); + ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, ""); + BranchInst::Create(NewBB, FailBB, Cmp, BB); + } + + // Return if we didn't modify any basic blocks. I.e., there are no return + // statements in the function. + if (!FailBB) return false; + + return true; +} + +/// CreateFailBB - Create a basic block to jump to when the stack protector +/// check fails. +BasicBlock *StackProtector::CreateFailBB() { + BasicBlock *FailBB = BasicBlock::Create(F->getContext(), + "CallStackCheckFailBlk", F); + Constant *StackChkFail = + M->getOrInsertFunction("__stack_chk_fail", + Type::getVoidTy(F->getContext()), NULL); + CallInst::Create(StackChkFail, "", FailBB); + new UnreachableInst(F->getContext(), FailBB); + return FailBB; +} diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp new file mode 100644 index 0000000..8d57ae9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -0,0 +1,758 @@ +//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the stack slot coloring pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackcoloring" +#include "VirtRegMap.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include <vector> +using namespace llvm; + +static cl::opt<bool> +DisableSharing("no-stack-slot-sharing", + cl::init(false), cl::Hidden, + cl::desc("Suppress slot sharing during stack coloring")); + +static cl::opt<bool> +ColorWithRegsOpt("color-ss-with-regs", + cl::init(false), cl::Hidden, + cl::desc("Color stack slots with free registers")); + + +static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden); + +STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring"); +STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs"); +STATISTIC(NumLoadElim, "Number of loads eliminated"); +STATISTIC(NumStoreElim, "Number of stores eliminated"); +STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); + +namespace { + class StackSlotColoring : public MachineFunctionPass { + bool ColorWithRegs; + LiveStacks* LS; + VirtRegMap* VRM; + MachineFrameInfo *MFI; + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MachineLoopInfo *loopInfo; + + // SSIntervals - Spill slot intervals. + std::vector<LiveInterval*> SSIntervals; + + // SSRefs - Keep a list of frame index references for each spill slot. + SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs; + + // OrigAlignments - Alignments of stack objects before coloring. + SmallVector<unsigned, 16> OrigAlignments; + + // OrigSizes - Sizess of stack objects before coloring. + SmallVector<unsigned, 16> OrigSizes; + + // AllColors - If index is set, it's a spill slot, i.e. color. + // FIXME: This assumes PEI locate spill slot with smaller indices + // closest to stack pointer / frame pointer. Therefore, smaller + // index == better color. + BitVector AllColors; + + // NextColor - Next "color" that's not yet used. + int NextColor; + + // UsedColors - "Colors" that have been assigned. + BitVector UsedColors; + + // Assignments - Color to intervals mapping. + SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments; + + public: + static char ID; // Pass identification + StackSlotColoring() : + MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {} + StackSlotColoring(bool RegColor) : + MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveStacks>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<VirtRegMap>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char* getPassName() const { + return "Stack Slot Coloring"; + } + + private: + void InitializeSlots(); + void ScanForSpillSlotRefs(MachineFunction &MF); + bool OverlapWithAssignments(LiveInterval *li, int Color) const; + int ColorSlot(LiveInterval *li); + bool ColorSlots(MachineFunction &MF); + bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, + SmallVector<SmallVector<int, 4>, 16> &RevMap, + BitVector &SlotIsReg); + void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, + MachineFunction &MF); + bool PropagateBackward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg); + bool PropagateForward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg); + void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, + unsigned Reg, const TargetRegisterClass *RC, + SmallSet<unsigned, 4> &Defs, + MachineFunction &MF); + bool AllMemRefsCanBeUnfolded(int SS); + bool RemoveDeadStores(MachineBasicBlock* MBB); + }; +} // end anonymous namespace + +char StackSlotColoring::ID = 0; + +INITIALIZE_PASS(StackSlotColoring, "stack-slot-coloring", + "Stack Slot Coloring", false, false); + +FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) { + return new StackSlotColoring(RegColor); +} + +namespace { + // IntervalSorter - Comparison predicate that sort live intervals by + // their weight. + struct IntervalSorter { + bool operator()(LiveInterval* LHS, LiveInterval* RHS) const { + return LHS->weight > RHS->weight; + } + }; +} + +/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot +/// references and update spill slot weights. +void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { + SSRefs.resize(MFI->getObjectIndexEnd()); + + // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = &*MBBI; + unsigned loopDepth = loopInfo->getLoopDepth(MBB); + for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); + MII != EE; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (FI < 0) + continue; + if (!LS->hasInterval(FI)) + continue; + LiveInterval &li = LS->getInterval(FI); + if (!MI->isDebugValue()) + li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); + SSRefs[FI].push_back(MI); + } + } + } +} + +/// InitializeSlots - Process all spill stack slot liveintervals and add them +/// to a sorted (by weight) list. +void StackSlotColoring::InitializeSlots() { + int LastFI = MFI->getObjectIndexEnd(); + OrigAlignments.resize(LastFI); + OrigSizes.resize(LastFI); + AllColors.resize(LastFI); + UsedColors.resize(LastFI); + Assignments.resize(LastFI); + + // Gather all spill slots into a list. + DEBUG(dbgs() << "Spill slot intervals:\n"); + for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { + LiveInterval &li = i->second; + DEBUG(li.dump()); + int FI = li.getStackSlotIndex(); + if (MFI->isDeadObjectIndex(FI)) + continue; + SSIntervals.push_back(&li); + OrigAlignments[FI] = MFI->getObjectAlignment(FI); + OrigSizes[FI] = MFI->getObjectSize(FI); + AllColors.set(FI); + } + DEBUG(dbgs() << '\n'); + + // Sort them by weight. + std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + + // Get first "color". + NextColor = AllColors.find_first(); +} + +/// OverlapWithAssignments - Return true if LiveInterval overlaps with any +/// LiveIntervals that have already been assigned to the specified color. +bool +StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { + const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color]; + for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) { + LiveInterval *OtherLI = OtherLIs[i]; + if (OtherLI->overlaps(*li)) + return true; + } + return false; +} + +/// ColorSlotsWithFreeRegs - If there are any free registers available, try +/// replacing spill slots references with registers instead. +bool +StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, + SmallVector<SmallVector<int, 4>, 16> &RevMap, + BitVector &SlotIsReg) { + if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters()) + return false; + + bool Changed = false; + DEBUG(dbgs() << "Assigning unused registers to spill slots:\n"); + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = li->getStackSlotIndex(); + if (!UsedColors[SS] || li->weight < 20) + // If the weight is < 20, i.e. two references in a loop with depth 1, + // don't bother with it. + continue; + + // These slots allow to share the same registers. + bool AllColored = true; + SmallVector<unsigned, 4> ColoredRegs; + for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) { + int RSS = RevMap[SS][j]; + const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS); + // If it's not colored to another stack slot, try coloring it + // to a "free" register. + if (!RC) { + AllColored = false; + continue; + } + unsigned Reg = VRM->getFirstUnusedRegister(RC); + if (!Reg) { + AllColored = false; + continue; + } + if (!AllMemRefsCanBeUnfolded(RSS)) { + AllColored = false; + continue; + } else { + DEBUG(dbgs() << "Assigning fi#" << RSS << " to " + << TRI->getName(Reg) << '\n'); + ColoredRegs.push_back(Reg); + SlotMapping[RSS] = Reg; + SlotIsReg.set(RSS); + Changed = true; + } + } + + // Register and its sub-registers are no longer free. + while (!ColoredRegs.empty()) { + unsigned Reg = ColoredRegs.back(); + ColoredRegs.pop_back(); + VRM->setRegisterUsed(Reg); + // If reg is a callee-saved register, it will have to be spilled in + // the prologue. + MRI->setPhysRegUsed(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + VRM->setRegisterUsed(*AS); + MRI->setPhysRegUsed(*AS); + } + } + // This spill slot is dead after the rewrites + if (AllColored) { + MFI->RemoveStackObject(SS); + ++NumEliminated; + } + } + DEBUG(dbgs() << '\n'); + + return Changed; +} + +/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot. +/// +int StackSlotColoring::ColorSlot(LiveInterval *li) { + int Color = -1; + bool Share = false; + if (!DisableSharing) { + // Check if it's possible to reuse any of the used colors. + Color = UsedColors.find_first(); + while (Color != -1) { + if (!OverlapWithAssignments(li, Color)) { + Share = true; + ++NumEliminated; + break; + } + Color = UsedColors.find_next(Color); + } + } + + // Assign it to the first available color (assumed to be the best) if it's + // not possible to share a used color with other objects. + if (!Share) { + assert(NextColor != -1 && "No more spill slots?"); + Color = NextColor; + UsedColors.set(Color); + NextColor = AllColors.find_next(NextColor); + } + + // Record the assignment. + Assignments[Color].push_back(li); + int FI = li->getStackSlotIndex(); + DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); + + // Change size and alignment of the allocated slot. If there are multiple + // objects sharing the same slot, then make sure the size and alignment + // are large enough for all. + unsigned Align = OrigAlignments[FI]; + if (!Share || Align > MFI->getObjectAlignment(Color)) + MFI->setObjectAlignment(Color, Align); + int64_t Size = OrigSizes[FI]; + if (!Share || Size > MFI->getObjectSize(Color)) + MFI->setObjectSize(Color, Size); + return Color; +} + +/// Colorslots - Color all spill stack slots and rewrite all frameindex machine +/// operands in the function. +bool StackSlotColoring::ColorSlots(MachineFunction &MF) { + unsigned NumObjs = MFI->getObjectIndexEnd(); + SmallVector<int, 16> SlotMapping(NumObjs, -1); + SmallVector<float, 16> SlotWeights(NumObjs, 0.0); + SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs); + BitVector SlotIsReg(NumObjs); + BitVector UsedColors(NumObjs); + + DEBUG(dbgs() << "Color spill slot intervals:\n"); + bool Changed = false; + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = li->getStackSlotIndex(); + int NewSS = ColorSlot(li); + assert(NewSS >= 0 && "Stack coloring failed?"); + SlotMapping[SS] = NewSS; + RevMap[NewSS].push_back(SS); + SlotWeights[NewSS] += li->weight; + UsedColors.set(NewSS); + Changed |= (SS != NewSS); + } + + DEBUG(dbgs() << "\nSpill slots after coloring:\n"); + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = li->getStackSlotIndex(); + li->weight = SlotWeights[SS]; + } + // Sort them by new weight. + std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + +#ifndef NDEBUG + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) + DEBUG(SSIntervals[i]->dump()); + DEBUG(dbgs() << '\n'); +#endif + + // Can we "color" a stack slot with a unused register? + Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg); + + if (!Changed) + return false; + + // Rewrite all MO_FrameIndex operands. + SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs()); + for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { + bool isReg = SlotIsReg[SS]; + int NewFI = SlotMapping[SS]; + if (NewFI == -1 || (NewFI == (int)SS && !isReg)) + continue; + + const TargetRegisterClass *RC = LS->getIntervalRegClass(SS); + SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; + for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) + if (!isReg) + RewriteInstruction(RefMIs[i], SS, NewFI, MF); + else { + // Rewrite to use a register instead. + unsigned MBBId = RefMIs[i]->getParent()->getNumber(); + SmallSet<unsigned, 4> &Defs = NewDefs[MBBId]; + UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF); + } + } + + // Delete unused stack slots. + while (NextColor != -1) { + DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); + MFI->RemoveStackObject(NextColor); + NextColor = AllColors.find_next(NextColor); + } + + return true; +} + +/// AllMemRefsCanBeUnfolded - Return true if all references of the specified +/// spill slot index can be unfolded. +bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) { + SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; + for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) { + MachineInstr *MI = RefMIs[i]; + if (TII->isLoadFromStackSlot(MI, SS) || + TII->isStoreToStackSlot(MI, SS)) + // Restore and spill will become copies. + return true; + if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false)) + return false; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = MI->getOperand(j); + if (MO.isFI() && MO.getIndex() != SS) + // If it uses another frameindex, we can, currently* unfold it. + return false; + } + } + return true; +} + +/// RewriteInstruction - Rewrite specified instruction by replacing references +/// to old frame index with new one. +void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, + int NewFI, MachineFunction &MF) { + // Update the operands. + for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (FI != OldFI) + continue; + MO.setIndex(NewFI); + } + + // Update the memory references. This changes the MachineMemOperands + // directly. They may be in use by multiple instructions, however all + // instructions using OldFI are being rewritten to use NewFI. + const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI); + const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) + if ((*I)->getValue() == OldSV) + (*I)->setValue(NewSV); +} + +/// PropagateBackward - Traverse backward and look for the definition of +/// OldReg. If it can successfully update all of the references with NewReg, +/// do so and return true. +bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg) { + if (MII == MBB->begin()) + return false; + + SmallVector<MachineOperand*, 4> Uses; + SmallVector<MachineOperand*, 4> Refs; + while (--MII != MBB->begin()) { + bool FoundDef = false; // Not counting 2address def. + + Uses.clear(); + const TargetInstrDesc &TID = MII->getDesc(); + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (Reg == OldReg) { + if (MO.isImplicit()) + return false; + + // Abort the use is actually a sub-register def. We don't have enough + // information to figure out if it is really legal. + if (MO.getSubReg() || MII->isSubregToReg()) + return false; + + const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); + if (RC && !RC->contains(NewReg)) + return false; + + if (MO.isUse()) { + Uses.push_back(&MO); + } else { + Refs.push_back(&MO); + if (!MII->isRegTiedToUseOperand(i)) + FoundDef = true; + } + } else if (TRI->regsOverlap(Reg, NewReg)) { + return false; + } else if (TRI->regsOverlap(Reg, OldReg)) { + if (!MO.isUse() || !MO.isKill()) + return false; + } + } + + if (FoundDef) { + // Found non-two-address def. Stop here. + for (unsigned i = 0, e = Refs.size(); i != e; ++i) + Refs[i]->setReg(NewReg); + return true; + } + + // Two-address uses must be updated as well. + for (unsigned i = 0, e = Uses.size(); i != e; ++i) + Refs.push_back(Uses[i]); + } + return false; +} + +/// PropagateForward - Traverse forward and look for the kill of OldReg. If +/// it can successfully update all of the uses with NewReg, do so and +/// return true. +bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg) { + if (MII == MBB->end()) + return false; + + SmallVector<MachineOperand*, 4> Uses; + while (++MII != MBB->end()) { + bool FoundKill = false; + const TargetInstrDesc &TID = MII->getDesc(); + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (Reg == OldReg) { + if (MO.isDef() || MO.isImplicit()) + return false; + + // Abort the use is actually a sub-register use. We don't have enough + // information to figure out if it is really legal. + if (MO.getSubReg()) + return false; + + const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); + if (RC && !RC->contains(NewReg)) + return false; + if (MO.isKill()) + FoundKill = true; + + Uses.push_back(&MO); + } else if (TRI->regsOverlap(Reg, NewReg) || + TRI->regsOverlap(Reg, OldReg)) + return false; + } + if (FoundKill) { + for (unsigned i = 0, e = Uses.size(); i != e; ++i) + Uses[i]->setReg(NewReg); + return true; + } + } + return false; +} + +/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding +/// folded memory references and replacing those references with register +/// references instead. +void +StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, + unsigned Reg, + const TargetRegisterClass *RC, + SmallSet<unsigned, 4> &Defs, + MachineFunction &MF) { + MachineBasicBlock *MBB = MI->getParent(); + if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) { + if (PropagateForward(MI, MBB, DstReg, Reg)) { + DEBUG(dbgs() << "Eliminated load: "); + DEBUG(MI->dump()); + ++NumLoadElim; + } else { + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), + DstReg).addReg(Reg); + ++NumRegRepl; + } + + if (!Defs.count(Reg)) { + // If this is the first use of Reg in this MBB and it wasn't previously + // defined in MBB, add it to livein. + MBB->addLiveIn(Reg); + Defs.insert(Reg); + } + } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) { + if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) { + DEBUG(dbgs() << "Eliminated store: "); + DEBUG(MI->dump()); + ++NumStoreElim; + } else { + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(SrcReg); + ++NumRegRepl; + } + + // Remember reg has been defined in MBB. + Defs.insert(Reg); + } else { + SmallVector<MachineInstr*, 4> NewMIs; + bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs); + Success = Success; // Silence compiler warning. + assert(Success && "Failed to unfold!"); + MachineInstr *NewMI = NewMIs[0]; + MBB->insert(MI, NewMI); + ++NumRegRepl; + + if (NewMI->readsRegister(Reg)) { + if (!Defs.count(Reg)) + // If this is the first use of Reg in this MBB and it wasn't previously + // defined in MBB, add it to livein. + MBB->addLiveIn(Reg); + Defs.insert(Reg); + } + } + MBB->erase(MI); +} + +/// RemoveDeadStores - Scan through a basic block and look for loads followed +/// by stores. If they're both using the same stack slot, then the store is +/// definitely dead. This could obviously be much more aggressive (consider +/// pairs with instructions between them), but such extensions might have a +/// considerable compile time impact. +bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { + // FIXME: This could be much more aggressive, but we need to investigate + // the compile time impact of doing so. + bool changed = false; + + SmallVector<MachineInstr*, 4> toErase; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (DCELimit != -1 && (int)NumDead >= DCELimit) + break; + + MachineBasicBlock::iterator NextMI = llvm::next(I); + if (NextMI == MBB->end()) continue; + + int FirstSS, SecondSS; + unsigned LoadReg = 0; + unsigned StoreReg = 0; + if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; + if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue; + if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; + + ++NumDead; + changed = true; + + if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) { + ++NumDead; + toErase.push_back(I); + } + + toErase.push_back(NextMI); + ++I; + } + + for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(), + E = toErase.end(); I != E; ++I) + (*I)->eraseFromParent(); + + return changed; +} + + +bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Stack Slot Coloring **********\n" + << "********** Function: " + << MF.getFunction()->getName() << '\n'; + }); + + MFI = MF.getFrameInfo(); + MRI = &MF.getRegInfo(); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + LS = &getAnalysis<LiveStacks>(); + VRM = &getAnalysis<VirtRegMap>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + bool Changed = false; + + unsigned NumSlots = LS->getNumIntervals(); + if (NumSlots < 2) { + if (NumSlots == 0 || !VRM->HasUnusedRegisters()) + // Nothing to do! + return false; + } + + // If there are calls to setjmp or sigsetjmp, don't perform stack slot + // coloring. The stack could be modified before the longjmp is executed, + // resulting in the wrong value being used afterwards. (See + // <rdar://problem/8007500>.) + if (MF.callsSetJmp()) + return false; + + // Gather spill slot references + ScanForSpillSlotRefs(MF); + InitializeSlots(); + Changed = ColorSlots(MF); + + NextColor = -1; + SSIntervals.clear(); + for (unsigned i = 0, e = SSRefs.size(); i != e; ++i) + SSRefs[i].clear(); + SSRefs.clear(); + OrigAlignments.clear(); + OrigSizes.clear(); + AllColors.clear(); + UsedColors.clear(); + for (unsigned i = 0, e = Assignments.size(); i != e; ++i) + Assignments[i].clear(); + Assignments.clear(); + + if (Changed) { + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= RemoveDeadStores(I); + } + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp new file mode 100644 index 0000000..894dbfa --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp @@ -0,0 +1,1045 @@ +//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates machine instruction PHI nodes by inserting copy +// instructions, using an intelligent copy-folding technique based on +// dominator information. This is technique is derived from: +// +// Budimlic, et al. Fast copy coalescing and live-range identification. +// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language +// Design and Implementation (Berlin, Germany, June 17 - 19, 2002). +// PLDI '02. ACM, New York, NY, 25-32. +// DOI= http://doi.acm.org/10.1145/512529.512534 +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "strongphielim" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +namespace { + struct StrongPHIElimination : public MachineFunctionPass { + static char ID; // Pass identification, replacement for typeid + StrongPHIElimination() : MachineFunctionPass(ID) {} + + // Waiting stores, for each MBB, the set of copies that need to + // be inserted into that MBB + DenseMap<MachineBasicBlock*, + std::multimap<unsigned, unsigned> > Waiting; + + // Stacks holds the renaming stack for each register + std::map<unsigned, std::vector<unsigned> > Stacks; + + // Registers in UsedByAnother are PHI nodes that are themselves + // used as operands to another PHI node + std::set<unsigned> UsedByAnother; + + // RenameSets are the is a map from a PHI-defined register + // to the input registers to be coalesced along with the + // predecessor block for those input registers. + std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets; + + // PhiValueNumber holds the ID numbers of the VNs for each phi that we're + // eliminating, indexed by the register defined by that phi. + std::map<unsigned, unsigned> PhiValueNumber; + + // Store the DFS-in number of each block + DenseMap<MachineBasicBlock*, unsigned> preorder; + + // Store the DFS-out number of each block + DenseMap<MachineBasicBlock*, unsigned> maxpreorder; + + bool runOnMachineFunction(MachineFunction &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveIntervals>(); + + // TODO: Actually make this true. + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<RegisterCoalescer>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual void releaseMemory() { + preorder.clear(); + maxpreorder.clear(); + + Waiting.clear(); + Stacks.clear(); + UsedByAnother.clear(); + RenameSets.clear(); + } + + private: + + /// DomForestNode - Represents a node in the "dominator forest". This is + /// a forest in which the nodes represent registers and the edges + /// represent a dominance relation in the block defining those registers. + struct DomForestNode { + private: + // Store references to our children + std::vector<DomForestNode*> children; + // The register we represent + unsigned reg; + + // Add another node as our child + void addChild(DomForestNode* DFN) { children.push_back(DFN); } + + public: + typedef std::vector<DomForestNode*>::iterator iterator; + + // Create a DomForestNode by providing the register it represents, and + // the node to be its parent. The virtual root node has register 0 + // and a null parent. + DomForestNode(unsigned r, DomForestNode* parent) : reg(r) { + if (parent) + parent->addChild(this); + } + + ~DomForestNode() { + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; + } + + /// getReg - Return the regiser that this node represents + inline unsigned getReg() { return reg; } + + // Provide iterator access to our children + inline DomForestNode::iterator begin() { return children.begin(); } + inline DomForestNode::iterator end() { return children.end(); } + }; + + void computeDFS(MachineFunction& MF); + void processBlock(MachineBasicBlock* MBB); + + std::vector<DomForestNode*> computeDomForest( + std::map<unsigned, MachineBasicBlock*>& instrs, + MachineRegisterInfo& MRI); + void processPHIUnion(MachineInstr* Inst, + std::map<unsigned, MachineBasicBlock*>& PHIUnion, + std::vector<StrongPHIElimination::DomForestNode*>& DF, + std::vector<std::pair<unsigned, unsigned> >& locals); + void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed); + void InsertCopies(MachineDomTreeNode* MBB, + SmallPtrSet<MachineBasicBlock*, 16>& v); + bool mergeLiveIntervals(unsigned primary, unsigned secondary); + }; +} + +char StrongPHIElimination::ID = 0; +INITIALIZE_PASS(StrongPHIElimination, "strong-phi-node-elimination", + "Eliminate PHI nodes for register allocation, intelligently", false, false); + +char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; + +/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree +/// of the given MachineFunction. These numbers are then used in other parts +/// of the PHI elimination process. +void StrongPHIElimination::computeDFS(MachineFunction& MF) { + SmallPtrSet<MachineDomTreeNode*, 8> frontier; + SmallPtrSet<MachineDomTreeNode*, 8> visited; + + unsigned time = 0; + + MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>(); + + MachineDomTreeNode* node = DT.getRootNode(); + + std::vector<MachineDomTreeNode*> worklist; + worklist.push_back(node); + + while (!worklist.empty()) { + MachineDomTreeNode* currNode = worklist.back(); + + if (!frontier.count(currNode)) { + frontier.insert(currNode); + ++time; + preorder.insert(std::make_pair(currNode->getBlock(), time)); + } + + bool inserted = false; + for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end(); + I != E; ++I) + if (!frontier.count(*I) && !visited.count(*I)) { + worklist.push_back(*I); + inserted = true; + break; + } + + if (!inserted) { + frontier.erase(currNode); + visited.insert(currNode); + maxpreorder.insert(std::make_pair(currNode->getBlock(), time)); + + worklist.pop_back(); + } + } +} + +namespace { + +/// PreorderSorter - a helper class that is used to sort registers +/// according to the preorder number of their defining blocks +class PreorderSorter { +private: + DenseMap<MachineBasicBlock*, unsigned>& preorder; + MachineRegisterInfo& MRI; + +public: + PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p, + MachineRegisterInfo& M) : preorder(p), MRI(M) { } + + bool operator()(unsigned A, unsigned B) { + if (A == B) + return false; + + MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent(); + MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent(); + + if (preorder[ABlock] < preorder[BBlock]) + return true; + else if (preorder[ABlock] > preorder[BBlock]) + return false; + + return false; + } +}; + +} + +/// computeDomForest - compute the subforest of the DomTree corresponding +/// to the defining blocks of the registers in question +std::vector<StrongPHIElimination::DomForestNode*> +StrongPHIElimination::computeDomForest( + std::map<unsigned, MachineBasicBlock*>& regs, + MachineRegisterInfo& MRI) { + // Begin by creating a virtual root node, since the actual results + // may well be a forest. Assume this node has maximum DFS-out number. + DomForestNode* VirtualRoot = new DomForestNode(0, 0); + maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL)); + + // Populate a worklist with the registers + std::vector<unsigned> worklist; + worklist.reserve(regs.size()); + for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(), + E = regs.end(); I != E; ++I) + worklist.push_back(I->first); + + // Sort the registers by the DFS-in number of their defining block + PreorderSorter PS(preorder, MRI); + std::sort(worklist.begin(), worklist.end(), PS); + + // Create a "current parent" stack, and put the virtual root on top of it + DomForestNode* CurrentParent = VirtualRoot; + std::vector<DomForestNode*> stack; + stack.push_back(VirtualRoot); + + // Iterate over all the registers in the previously computed order + for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end(); + I != E; ++I) { + unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()]; + MachineBasicBlock* parentBlock = CurrentParent->getReg() ? + MRI.getVRegDef(CurrentParent->getReg())->getParent() : + 0; + + // If the DFS-in number of the register is greater than the DFS-out number + // of the current parent, repeatedly pop the parent stack until it isn't. + while (pre > maxpreorder[parentBlock]) { + stack.pop_back(); + CurrentParent = stack.back(); + + parentBlock = CurrentParent->getReg() ? + MRI.getVRegDef(CurrentParent->getReg())->getParent() : + 0; + } + + // Now that we've found the appropriate parent, create a DomForestNode for + // this register and attach it to the forest + DomForestNode* child = new DomForestNode(*I, CurrentParent); + + // Push this new node on the "current parent" stack + stack.push_back(child); + CurrentParent = child; + } + + // Return a vector containing the children of the virtual root node + std::vector<DomForestNode*> ret; + ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end()); + return ret; +} + +/// isLiveIn - helper method that determines, from a regno, if a register +/// is live into a block +static bool isLiveIn(unsigned r, MachineBasicBlock* MBB, + LiveIntervals& LI) { + LiveInterval& I = LI.getOrCreateInterval(r); + SlotIndex idx = LI.getMBBStartIdx(MBB); + return I.liveAt(idx); +} + +/// isLiveOut - help method that determines, from a regno, if a register is +/// live out of a block. +static bool isLiveOut(unsigned r, MachineBasicBlock* MBB, + LiveIntervals& LI) { + for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), + E = MBB->succ_end(); PI != E; ++PI) + if (isLiveIn(r, *PI, LI)) + return true; + + return false; +} + +/// interferes - checks for local interferences by scanning a block. The only +/// trick parameter is 'mode' which tells it the relationship of the two +/// registers. 0 - defined in the same block, 1 - first properly dominates +/// second, 2 - second properly dominates first +static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan, + LiveIntervals& LV, unsigned mode) { + MachineInstr* def = 0; + MachineInstr* kill = 0; + + // The code is still in SSA form at this point, so there is only one + // definition per VReg. Thus we can safely use MRI->getVRegDef(). + const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo(); + + bool interference = false; + + // Wallk the block, checking for interferences + for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end(); + MBI != MBE; ++MBI) { + MachineInstr* curr = MBI; + + // Same defining block... + if (mode == 0) { + if (curr == MRI->getVRegDef(a)) { + // If we find our first definition, save it + if (!def) { + def = curr; + // If there's already an unkilled definition, then + // this is an interference + } else if (!kill) { + interference = true; + break; + // If there's a definition followed by a KillInst, then + // they can't interfere + } else { + interference = false; + break; + } + // Symmetric with the above + } else if (curr == MRI->getVRegDef(b)) { + if (!def) { + def = curr; + } else if (!kill) { + interference = true; + break; + } else { + interference = false; + break; + } + // Store KillInsts if they match up with the definition + } else if (curr->killsRegister(a)) { + if (def == MRI->getVRegDef(a)) { + kill = curr; + } else if (curr->killsRegister(b)) { + if (def == MRI->getVRegDef(b)) { + kill = curr; + } + } + } + // First properly dominates second... + } else if (mode == 1) { + if (curr == MRI->getVRegDef(b)) { + // Definition of second without kill of first is an interference + if (!kill) { + interference = true; + break; + // Definition after a kill is a non-interference + } else { + interference = false; + break; + } + // Save KillInsts of First + } else if (curr->killsRegister(a)) { + kill = curr; + } + // Symmetric with the above + } else if (mode == 2) { + if (curr == MRI->getVRegDef(a)) { + if (!kill) { + interference = true; + break; + } else { + interference = false; + break; + } + } else if (curr->killsRegister(b)) { + kill = curr; + } + } + } + + return interference; +} + +/// processBlock - Determine how to break up PHIs in the current block. Each +/// PHI is broken up by some combination of renaming its operands and inserting +/// copies. This method is responsible for determining which operands receive +/// which treatment. +void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); + + // Holds names that have been added to a set in any PHI within this block + // before the current one. + std::set<unsigned> ProcessedNames; + + // Iterate over all the PHI nodes in this block + MachineBasicBlock::iterator P = MBB->begin(); + while (P != MBB->end() && P->isPHI()) { + unsigned DestReg = P->getOperand(0).getReg(); + + // Don't both doing PHI elimination for dead PHI's. + if (P->registerDefIsDead(DestReg)) { + ++P; + continue; + } + + LiveInterval& PI = LI.getOrCreateInterval(DestReg); + SlotIndex pIdx = LI.getInstructionIndex(P).getDefIndex(); + VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno; + PhiValueNumber.insert(std::make_pair(DestReg, PVN->id)); + + // PHIUnion is the set of incoming registers to the PHI node that + // are going to be renames rather than having copies inserted. This set + // is refinded over the course of this function. UnionedBlocks is the set + // of corresponding MBBs. + std::map<unsigned, MachineBasicBlock*> PHIUnion; + SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks; + + // Iterate over the operands of the PHI node + for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { + unsigned SrcReg = P->getOperand(i-1).getReg(); + + // Don't need to try to coalesce a register with itself. + if (SrcReg == DestReg) { + ProcessedNames.insert(SrcReg); + continue; + } + + // We don't need to insert copies for implicit_defs. + MachineInstr* DefMI = MRI.getVRegDef(SrcReg); + if (DefMI->isImplicitDef()) + ProcessedNames.insert(SrcReg); + + // Check for trivial interferences via liveness information, allowing us + // to avoid extra work later. Any registers that interfere cannot both + // be in the renaming set, so choose one and add copies for it instead. + // The conditions are: + // 1) if the operand is live into the PHI node's block OR + // 2) if the PHI node is live out of the operand's defining block OR + // 3) if the operand is itself a PHI node and the original PHI is + // live into the operand's defining block OR + // 4) if the operand is already being renamed for another PHI node + // in this block OR + // 5) if any two operands are defined in the same block, insert copies + // for one of them + if (isLiveIn(SrcReg, P->getParent(), LI) || + isLiveOut(P->getOperand(0).getReg(), + MRI.getVRegDef(SrcReg)->getParent(), LI) || + ( MRI.getVRegDef(SrcReg)->isPHI() && + isLiveIn(P->getOperand(0).getReg(), + MRI.getVRegDef(SrcReg)->getParent(), LI) ) || + ProcessedNames.count(SrcReg) || + UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) { + + // Add a copy for the selected register + MachineBasicBlock* From = P->getOperand(i).getMBB(); + Waiting[From].insert(std::make_pair(SrcReg, DestReg)); + UsedByAnother.insert(SrcReg); + } else { + // Otherwise, add it to the renaming set + PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB())); + UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent()); + } + } + + // Compute the dominator forest for the renaming set. This is a forest + // where the nodes are the registers and the edges represent dominance + // relations between the defining blocks of the registers + std::vector<StrongPHIElimination::DomForestNode*> DF = + computeDomForest(PHIUnion, MRI); + + // Walk DomForest to resolve interferences at an inter-block level. This + // will remove registers from the renaming set (and insert copies for them) + // if interferences are found. + std::vector<std::pair<unsigned, unsigned> > localInterferences; + processPHIUnion(P, PHIUnion, DF, localInterferences); + + // If one of the inputs is defined in the same block as the current PHI + // then we need to check for a local interference between that input and + // the PHI. + for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(), + E = PHIUnion.end(); I != E; ++I) + if (MRI.getVRegDef(I->first)->getParent() == P->getParent()) + localInterferences.push_back(std::make_pair(I->first, + P->getOperand(0).getReg())); + + // The dominator forest walk may have returned some register pairs whose + // interference cannot be determined from dominator analysis. We now + // examine these pairs for local interferences. + for (std::vector<std::pair<unsigned, unsigned> >::iterator I = + localInterferences.begin(), E = localInterferences.end(); I != E; ++I) { + std::pair<unsigned, unsigned> p = *I; + + MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); + + // Determine the block we need to scan and the relationship between + // the two registers + MachineBasicBlock* scan = 0; + unsigned mode = 0; + if (MRI.getVRegDef(p.first)->getParent() == + MRI.getVRegDef(p.second)->getParent()) { + scan = MRI.getVRegDef(p.first)->getParent(); + mode = 0; // Same block + } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(), + MRI.getVRegDef(p.second)->getParent())) { + scan = MRI.getVRegDef(p.second)->getParent(); + mode = 1; // First dominates second + } else { + scan = MRI.getVRegDef(p.first)->getParent(); + mode = 2; // Second dominates first + } + + // If there's an interference, we need to insert copies + if (interferes(p.first, p.second, scan, LI, mode)) { + // Insert copies for First + for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { + if (P->getOperand(i-1).getReg() == p.first) { + unsigned SrcReg = p.first; + MachineBasicBlock* From = P->getOperand(i).getMBB(); + + Waiting[From].insert(std::make_pair(SrcReg, + P->getOperand(0).getReg())); + UsedByAnother.insert(SrcReg); + + PHIUnion.erase(SrcReg); + } + } + } + } + + // Add the renaming set for this PHI node to our overall renaming information + for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(), + QE = PHIUnion.end(); QI != QE; ++QI) { + DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> " + << P->getOperand(0).getReg() << "\n"); + } + + RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion)); + + // Remember which registers are already renamed, so that we don't try to + // rename them for another PHI node in this block + for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(), + E = PHIUnion.end(); I != E; ++I) + ProcessedNames.insert(I->first); + + ++P; + } +} + +/// processPHIUnion - Take a set of candidate registers to be coalesced when +/// decomposing the PHI instruction. Use the DominanceForest to remove the ones +/// that are known to interfere, and flag others that need to be checked for +/// local interferences. +void StrongPHIElimination::processPHIUnion(MachineInstr* Inst, + std::map<unsigned, MachineBasicBlock*>& PHIUnion, + std::vector<StrongPHIElimination::DomForestNode*>& DF, + std::vector<std::pair<unsigned, unsigned> >& locals) { + + std::vector<DomForestNode*> worklist(DF.begin(), DF.end()); + SmallPtrSet<DomForestNode*, 4> visited; + + // Code is still in SSA form, so we can use MRI::getVRegDef() + MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo(); + + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + unsigned DestReg = Inst->getOperand(0).getReg(); + + // DF walk on the DomForest + while (!worklist.empty()) { + DomForestNode* DFNode = worklist.back(); + + visited.insert(DFNode); + + bool inserted = false; + for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end(); + CI != CE; ++CI) { + DomForestNode* child = *CI; + + // If the current node is live-out of the defining block of one of its + // children, insert a copy for it. NOTE: The paper actually calls for + // a more elaborate heuristic for determining whether to insert copies + // for the child or the parent. In the interest of simplicity, we're + // just always choosing the parent. + if (isLiveOut(DFNode->getReg(), + MRI.getVRegDef(child->getReg())->getParent(), LI)) { + // Insert copies for parent + for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) { + if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) { + unsigned SrcReg = DFNode->getReg(); + MachineBasicBlock* From = Inst->getOperand(i).getMBB(); + + Waiting[From].insert(std::make_pair(SrcReg, DestReg)); + UsedByAnother.insert(SrcReg); + + PHIUnion.erase(SrcReg); + } + } + + // If a node is live-in to the defining block of one of its children, but + // not live-out, then we need to scan that block for local interferences. + } else if (isLiveIn(DFNode->getReg(), + MRI.getVRegDef(child->getReg())->getParent(), LI) || + MRI.getVRegDef(DFNode->getReg())->getParent() == + MRI.getVRegDef(child->getReg())->getParent()) { + // Add (p, c) to possible local interferences + locals.push_back(std::make_pair(DFNode->getReg(), child->getReg())); + } + + if (!visited.count(child)) { + worklist.push_back(child); + inserted = true; + } + } + + if (!inserted) worklist.pop_back(); + } +} + +/// ScheduleCopies - Insert copies into predecessor blocks, scheduling +/// them properly so as to avoid the 'lost copy' and the 'virtual swap' +/// problems. +/// +/// Based on "Practical Improvements to the Construction and Destruction +/// of Static Single Assignment Form" by Briggs, et al. +void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, + std::set<unsigned>& pushed) { + // FIXME: This function needs to update LiveIntervals + std::multimap<unsigned, unsigned>& copy_set= Waiting[MBB]; + + std::multimap<unsigned, unsigned> worklist; + std::map<unsigned, unsigned> map; + + // Setup worklist of initial copies + for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(), + E = copy_set.end(); I != E; ) { + map.insert(std::make_pair(I->first, I->first)); + map.insert(std::make_pair(I->second, I->second)); + + if (!UsedByAnother.count(I->second)) { + worklist.insert(*I); + + // Avoid iterator invalidation + std::multimap<unsigned, unsigned>::iterator OI = I; + ++I; + copy_set.erase(OI); + } else { + ++I; + } + } + + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + MachineFunction* MF = MBB->getParent(); + MachineRegisterInfo& MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests; + + // Iterate over the worklist, inserting copies + while (!worklist.empty() || !copy_set.empty()) { + while (!worklist.empty()) { + std::multimap<unsigned, unsigned>::iterator WI = worklist.begin(); + std::pair<unsigned, unsigned> curr = *WI; + worklist.erase(WI); + + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first); + + if (isLiveOut(curr.second, MBB, LI)) { + // Create a temporary + unsigned t = MF->getRegInfo().createVirtualRegister(RC); + + // Insert copy from curr.second to a temporary at + // the Phi defining curr.second + MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second); + BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY), + t).addReg(curr.second); + DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t + << "\n"); + + // Push temporary on Stacks + Stacks[curr.second].push_back(t); + + // Insert curr.second in pushed + pushed.insert(curr.second); + + // Create a live interval for this temporary + InsertedPHIDests.push_back(std::make_pair(t, --PI)); + } + + // Insert copy from map[curr.first] to curr.second + BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]); + map[curr.first] = curr.second; + DEBUG(dbgs() << "Inserted copy from " << curr.first << " to " + << curr.second << "\n"); + + // Push this copy onto InsertedPHICopies so we can + // update LiveIntervals with it. + MachineBasicBlock::iterator MI = MBB->getFirstTerminator(); + InsertedPHIDests.push_back(std::make_pair(curr.second, --MI)); + + // If curr.first is a destination in copy_set... + for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(), + E = copy_set.end(); I != E; ) + if (curr.first == I->second) { + std::pair<unsigned, unsigned> temp = *I; + worklist.insert(temp); + + // Avoid iterator invalidation + std::multimap<unsigned, unsigned>::iterator OI = I; + ++I; + copy_set.erase(OI); + + break; + } else { + ++I; + } + } + + if (!copy_set.empty()) { + std::multimap<unsigned, unsigned>::iterator CI = copy_set.begin(); + std::pair<unsigned, unsigned> curr = *CI; + worklist.insert(curr); + copy_set.erase(CI); + + LiveInterval& I = LI.getInterval(curr.second); + MachineBasicBlock::iterator term = MBB->getFirstTerminator(); + SlotIndex endIdx = SlotIndex(); + if (term != MBB->end()) + endIdx = LI.getInstructionIndex(term); + else + endIdx = LI.getMBBEndIdx(MBB); + + if (I.liveAt(endIdx)) { + const TargetRegisterClass *RC = + MF->getRegInfo().getRegClass(curr.first); + + // Insert a copy from dest to a new temporary t at the end of b + unsigned t = MF->getRegInfo().createVirtualRegister(RC); + BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), t).addReg(curr.second); + map[curr.second] = t; + + MachineBasicBlock::iterator TI = MBB->getFirstTerminator(); + InsertedPHIDests.push_back(std::make_pair(t, --TI)); + } + } + } + + // Renumber the instructions so that we can perform the index computations + // needed to create new live intervals. + LI.renumber(); + + // For copies that we inserted at the ends of predecessors, we construct + // live intervals. This is pretty easy, since we know that the destination + // register cannot have be in live at that point previously. We just have + // to make sure that, for registers that serve as inputs to more than one + // PHI, we don't create multiple overlapping live intervals. + std::set<unsigned> RegHandled; + for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I = + InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) { + if (RegHandled.insert(I->first).second) { + LiveInterval& Int = LI.getOrCreateInterval(I->first); + SlotIndex instrIdx = LI.getInstructionIndex(I->second); + if (Int.liveAt(instrIdx.getDefIndex())) + Int.removeRange(instrIdx.getDefIndex(), + LI.getMBBEndIdx(I->second->getParent()).getNextSlot(), + true); + + LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second); + R.valno->setCopy(I->second); + R.valno->def = LI.getInstructionIndex(I->second).getDefIndex(); + } + } +} + +/// InsertCopies - insert copies into MBB and all of its successors +void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, + SmallPtrSet<MachineBasicBlock*, 16>& visited) { + MachineBasicBlock* MBB = MDTN->getBlock(); + visited.insert(MBB); + + std::set<unsigned> pushed; + + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + // Rewrite register uses from Stacks + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->isPHI()) + continue; + + for (unsigned i = 0; i < I->getNumOperands(); ++i) + if (I->getOperand(i).isReg() && + Stacks[I->getOperand(i).getReg()].size()) { + // Remove the live range for the old vreg. + LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg()); + LiveInterval::iterator OldLR = + OldInt.FindLiveRangeContaining(LI.getInstructionIndex(I).getUseIndex()); + if (OldLR != OldInt.end()) + OldInt.removeRange(*OldLR, true); + + // Change the register + I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back()); + + // Add a live range for the new vreg + LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg()); + VNInfo* FirstVN = *Int.vni_begin(); + FirstVN->setHasPHIKill(false); + LiveRange LR (LI.getMBBStartIdx(I->getParent()), + LI.getInstructionIndex(I).getUseIndex().getNextSlot(), + FirstVN); + + Int.addRange(LR); + } + } + + // Schedule the copies for this block + ScheduleCopies(MBB, pushed); + + // Recur down the dominator tree. + for (MachineDomTreeNode::iterator I = MDTN->begin(), + E = MDTN->end(); I != E; ++I) + if (!visited.count((*I)->getBlock())) + InsertCopies(*I, visited); + + // As we exit this block, pop the names we pushed while processing it + for (std::set<unsigned>::iterator I = pushed.begin(), + E = pushed.end(); I != E; ++I) + Stacks[*I].pop_back(); +} + +bool StrongPHIElimination::mergeLiveIntervals(unsigned primary, + unsigned secondary) { + + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + LiveInterval& LHS = LI.getOrCreateInterval(primary); + LiveInterval& RHS = LI.getOrCreateInterval(secondary); + + LI.renumber(); + + DenseMap<VNInfo*, VNInfo*> VNMap; + for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + LiveRange R = *I; + + SlotIndex Start = R.start; + SlotIndex End = R.end; + if (LHS.getLiveRangeContaining(Start)) + return false; + + if (LHS.getLiveRangeContaining(End)) + return false; + + LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R); + if (RI != LHS.end() && RI->start < End) + return false; + } + + for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + LiveRange R = *I; + VNInfo* OldVN = R.valno; + VNInfo*& NewVN = VNMap[OldVN]; + if (!NewVN) { + NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator()); + } + + LiveRange LR (R.start, R.end, NewVN); + LHS.addRange(LR); + } + + LI.removeInterval(RHS.reg); + + return true; +} + +bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + + // Compute DFS numbers of each block + computeDFS(Fn); + + // Determine which phi node operands need copies + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + if (!I->empty() && I->begin()->isPHI()) + processBlock(I); + + // Break interferences where two different phis want to coalesce + // in the same register. + std::set<unsigned> seen; + typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > + RenameSetType; + for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); + I != E; ++I) { + for (std::map<unsigned, MachineBasicBlock*>::iterator + OI = I->second.begin(), OE = I->second.end(); OI != OE; ) { + if (!seen.count(OI->first)) { + seen.insert(OI->first); + ++OI; + } else { + Waiting[OI->second].insert(std::make_pair(OI->first, I->first)); + unsigned reg = OI->first; + ++OI; + I->second.erase(reg); + DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first + << "\n"); + } + } + } + + // Insert copies + // FIXME: This process should probably preserve LiveIntervals + SmallPtrSet<MachineBasicBlock*, 16> visited; + MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); + InsertCopies(MDT.getRootNode(), visited); + + // Perform renaming + for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); + I != E; ++I) + while (I->second.size()) { + std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin(); + + DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n"); + + if (SI->first != I->first) { + if (mergeLiveIntervals(I->first, SI->first)) { + Fn.getRegInfo().replaceRegWith(SI->first, I->first); + + if (RenameSets.count(SI->first)) { + I->second.insert(RenameSets[SI->first].begin(), + RenameSets[SI->first].end()); + RenameSets.erase(SI->first); + } + } else { + // Insert a last-minute copy if a conflict was detected. + const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), I->first).addReg(SI->first); + + LI.renumber(); + + LiveInterval& Int = LI.getOrCreateInterval(I->first); + SlotIndex instrIdx = + LI.getInstructionIndex(--SI->second->getFirstTerminator()); + if (Int.liveAt(instrIdx.getDefIndex())) + Int.removeRange(instrIdx.getDefIndex(), + LI.getMBBEndIdx(SI->second).getNextSlot(), true); + + LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, + --SI->second->getFirstTerminator()); + R.valno->setCopy(--SI->second->getFirstTerminator()); + R.valno->def = instrIdx.getDefIndex(); + + DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> " + << I->first << "\n"); + } + } + + LiveInterval& Int = LI.getOrCreateInterval(I->first); + const LiveRange* LR = + Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second)); + LR->valno->setHasPHIKill(true); + + I->second.erase(SI->first); + } + + // Remove PHIs + std::vector<MachineInstr*> phis; + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end(); + BI != BE; ++BI) + if (BI->isPHI()) + phis.push_back(BI); + } + + for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end(); + I != E; ) { + MachineInstr* PInstr = *(I++); + + // If this is a dead PHI node, then remove it from LiveIntervals. + unsigned DestReg = PInstr->getOperand(0).getReg(); + LiveInterval& PI = LI.getInterval(DestReg); + if (PInstr->registerDefIsDead(DestReg)) { + if (PI.containsOneValue()) { + LI.removeInterval(DestReg); + } else { + SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex(); + PI.removeRange(*PI.getLiveRangeContaining(idx), true); + } + } else { + // Trim live intervals of input registers. They are no longer live into + // this block if they died after the PHI. If they lived after it, don't + // trim them because they might have other legitimate uses. + for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) { + unsigned reg = PInstr->getOperand(i).getReg(); + + MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB(); + LiveInterval& InputI = LI.getInterval(reg); + if (MBB != PInstr->getParent() && + InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) && + InputI.expiredAt(LI.getInstructionIndex(PInstr).getNextIndex())) + InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()), + LI.getInstructionIndex(PInstr), + true); + } + + // If the PHI is not dead, then the valno defined by the PHI + // now has an unknown def. + SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex(); + const LiveRange* PLR = PI.getLiveRangeContaining(idx); + PLR->valno->setIsPHIDef(true); + LiveRange R (LI.getMBBStartIdx(PInstr->getParent()), + PLR->start, PLR->valno); + PI.addRange(R); + } + + LI.RemoveMachineInstrFromMaps(PInstr); + PInstr->eraseFromParent(); + } + + LI.renumber(); + + return true; +} diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp new file mode 100644 index 0000000..a815b36 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -0,0 +1,653 @@ +//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass duplicates basic blocks ending in unconditional branches into +// the tails of their predecessors. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "tailduplication" +#include "llvm/Function.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumTails , "Number of tails duplicated"); +STATISTIC(NumTailDups , "Number of tail duplicated blocks"); +STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); +STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); + +// Heuristic for tail duplication. +static cl::opt<unsigned> +TailDuplicateSize("tail-dup-size", + cl::desc("Maximum instructions to consider tail duplicating"), + cl::init(2), cl::Hidden); + +static cl::opt<bool> +TailDupVerify("tail-dup-verify", + cl::desc("Verify sanity of PHI instructions during taildup"), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> +TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); + +typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; + +namespace { + /// TailDuplicatePass - Perform tail duplication. + class TailDuplicatePass : public MachineFunctionPass { + bool PreRegAlloc; + const TargetInstrInfo *TII; + MachineModuleInfo *MMI; + MachineRegisterInfo *MRI; + + // SSAUpdateVRs - A list of virtual registers for which to update SSA form. + SmallVector<unsigned, 16> SSAUpdateVRs; + + // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of + // source virtual registers. + DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; + + public: + static char ID; + explicit TailDuplicatePass(bool PreRA) : + MachineFunctionPass(ID), PreRegAlloc(PreRA) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Tail Duplication"; } + + private: + void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB); + void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVector<std::pair<unsigned,unsigned>, 4> &Copies); + void DuplicateInstruction(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + MachineFunction &MF, + DenseMap<unsigned, unsigned> &LocalVRMap); + void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallSetVector<MachineBasicBlock*, 8> &Succs); + bool TailDuplicateBlocks(MachineFunction &MF); + bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVector<MachineInstr*, 16> &Copies); + void RemoveDeadBlock(MachineBasicBlock *MBB); + }; + + char TailDuplicatePass::ID = 0; +} + +FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) { + return new TailDuplicatePass(PreRegAlloc); +} + +bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + MRI = &MF.getRegInfo(); + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + + bool MadeChange = false; + while (TailDuplicateBlocks(MF)) + MadeChange = true; + + return MadeChange; +} + +static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = I; + SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(), + MBB->pred_end()); + MachineBasicBlock::iterator MI = MBB->begin(); + while (MI != MBB->end()) { + if (!MI->isPHI()) + break; + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + bool Found = false; + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); + if (PHIBB == PredBB) { + Found = true; + break; + } + } + if (!Found) { + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " missing input from predecessor BB#" + << PredBB->getNumber() << '\n'; + llvm_unreachable(0); + } + } + + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); + if (CheckExtra && !Preds.count(PHIBB)) { + // This is not a hard error. + dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() + << ": " << *MI; + dbgs() << " extra input from predecessor BB#" + << PHIBB->getNumber() << '\n'; + } + if (PHIBB->getNumber() < 0) { + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; + llvm_unreachable(0); + } + } + ++MI; + } + } +} + +/// TailDuplicateBlocks - Look for small blocks that are unconditionally +/// branched to and do not fall through. Tail-duplicate their instructions +/// into their predecessors to eliminate (dynamic) branches. +bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { + bool MadeChange = false; + + if (PreRegAlloc && TailDupVerify) { + DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); + VerifyPHIs(MF, true); + } + + SmallVector<MachineInstr*, 8> NewPHIs; + MachineSSAUpdater SSAUpdate(MF, &NewPHIs); + + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + + if (NumTails == TailDupLimit) + break; + + // Only duplicate blocks that end with unconditional branches. + if (MBB->canFallThrough()) + continue; + + // Save the successors list. + SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), + MBB->succ_end()); + + SmallVector<MachineBasicBlock*, 8> TDBBs; + SmallVector<MachineInstr*, 16> Copies; + if (TailDuplicate(MBB, MF, TDBBs, Copies)) { + ++NumTails; + + // TailBB's immediate successors are now successors of those predecessors + // which duplicated TailBB. Add the predecessors as sources to the PHI + // instructions. + bool isDead = MBB->pred_empty(); + if (PreRegAlloc) + UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); + + // If it is dead, remove it. + if (isDead) { + NumInstrDups -= MBB->size(); + RemoveDeadBlock(MBB); + ++NumDeadBlocks; + } + + // Update SSA form. + if (!SSAUpdateVRs.empty()) { + for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { + unsigned VReg = SSAUpdateVRs[i]; + SSAUpdate.Initialize(VReg); + + // If the original definition is still around, add it as an available + // value. + MachineInstr *DefMI = MRI->getVRegDef(VReg); + MachineBasicBlock *DefBB = 0; + if (DefMI) { + DefBB = DefMI->getParent(); + SSAUpdate.AddAvailableValue(DefBB, VReg); + } + + // Add the new vregs as available values. + DenseMap<unsigned, AvailableValsTy>::iterator LI = + SSAUpdateVals.find(VReg); + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + SSAUpdate.AddAvailableValue(SrcBB, SrcReg); + } + + // Rewrite uses that are outside of the original def's block. + MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); + while (UI != MRI->use_end()) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI->getParent() == DefBB) + continue; + SSAUpdate.RewriteUse(UseMO); + } + } + + SSAUpdateVRs.clear(); + SSAUpdateVals.clear(); + } + + // Eliminate some of the copies inserted by tail duplication to maintain + // SSA form. + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + MachineInstr *Copy = Copies[i]; + if (!Copy->isCopy()) + continue; + unsigned Dst = Copy->getOperand(0).getReg(); + unsigned Src = Copy->getOperand(1).getReg(); + MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); + if (++UI == MRI->use_end()) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); + } + } + + if (PreRegAlloc && TailDupVerify) + VerifyPHIs(MF, false); + MadeChange = true; + } + } + + return MadeChange; +} + +static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, + const MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI->getParent() != BB) + return true; + } + return false; +} + +static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) + if (MI->getOperand(i+1).getMBB() == SrcBB) + return i; + return 0; +} + +/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for +/// SSA update. +void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB) { + DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg); + if (LI != SSAUpdateVals.end()) + LI->second.push_back(std::make_pair(BB, NewReg)); + else { + AvailableValsTy Vals; + Vals.push_back(std::make_pair(BB, NewReg)); + SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); + SSAUpdateVRs.push_back(OrigReg); + } +} + +/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. +/// Remember the source register that's contributed by PredBB and update SSA +/// update map. +void TailDuplicatePass::ProcessPHI(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVector<std::pair<unsigned,unsigned>, 4> &Copies) { + unsigned DefReg = MI->getOperand(0).getReg(); + unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); + assert(SrcOpIdx && "Unable to find matching PHI source?"); + unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DefReg); + LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); + + // Insert a copy from source to the end of the block. The def register is the + // available value liveout of the block. + unsigned NewDef = MRI->createVirtualRegister(RC); + Copies.push_back(std::make_pair(NewDef, SrcReg)); + if (isDefLiveOut(DefReg, TailBB, MRI)) + AddSSAUpdateEntry(DefReg, NewDef, PredBB); + + // Remove PredBB from the PHI node. + MI->RemoveOperand(SrcOpIdx+1); + MI->RemoveOperand(SrcOpIdx); + if (MI->getNumOperands() == 1) + MI->eraseFromParent(); +} + +/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update +/// the source operands due to earlier PHI translation. +void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + MachineFunction &MF, + DenseMap<unsigned, unsigned> &LocalVRMap) { + MachineInstr *NewMI = TII->duplicate(MI, MF); + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MO.isDef()) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + unsigned NewReg = MRI->createVirtualRegister(RC); + MO.setReg(NewReg); + LocalVRMap.insert(std::make_pair(Reg, NewReg)); + if (isDefLiveOut(Reg, TailBB, MRI)) + AddSSAUpdateEntry(Reg, NewReg, PredBB); + } else { + DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); + if (VI != LocalVRMap.end()) + MO.setReg(VI->second); + } + } + PredBB->insert(PredBB->end(), NewMI); +} + +/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor +/// blocks, the successors have gained new predecessors. Update the PHI +/// instructions in them accordingly. +void +TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallSetVector<MachineBasicBlock*,8> &Succs) { + for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), + SE = Succs.end(); SI != SE; ++SI) { + MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); + II != EE; ++II) { + if (!II->isPHI()) + break; + unsigned Idx = 0; + for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { + MachineOperand &MO = II->getOperand(i+1); + if (MO.getMBB() == FromBB) { + Idx = i; + break; + } + } + + assert(Idx != 0); + MachineOperand &MO0 = II->getOperand(Idx); + unsigned Reg = MO0.getReg(); + if (isDead) { + // Folded into the previous BB. + // There could be duplicate phi source entries. FIXME: Should sdisel + // or earlier pass fixed this? + for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { + MachineOperand &MO = II->getOperand(i+1); + if (MO.getMBB() == FromBB) { + II->RemoveOperand(i+1); + II->RemoveOperand(i); + } + } + } else + Idx = 0; + + // If Idx is set, the operands at Idx and Idx+1 must be removed. + // We reuse the location to avoid expensive RemoveOperand calls. + + DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); + if (LI != SSAUpdateVals.end()) { + // This register is defined in the tail block. + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + if (Idx != 0) { + II->getOperand(Idx).setReg(SrcReg); + II->getOperand(Idx+1).setMBB(SrcBB); + Idx = 0; + } else { + II->addOperand(MachineOperand::CreateReg(SrcReg, false)); + II->addOperand(MachineOperand::CreateMBB(SrcBB)); + } + } + } else { + // Live in tail block, must also be live in predecessors. + for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = TDBBs[j]; + if (Idx != 0) { + II->getOperand(Idx).setReg(Reg); + II->getOperand(Idx+1).setMBB(SrcBB); + Idx = 0; + } else { + II->addOperand(MachineOperand::CreateReg(Reg, false)); + II->addOperand(MachineOperand::CreateMBB(SrcBB)); + } + } + } + if (Idx != 0) { + II->RemoveOperand(Idx+1); + II->RemoveOperand(Idx); + } + } + } +} + +/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each +/// of its predecessors. +bool +TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVector<MachineInstr*, 16> &Copies) { + // Set the limit on the number of instructions to duplicate, with a default + // of one less than the tail-merge threshold. When optimizing for size, + // duplicate only one, because one branch instruction can be eliminated to + // compensate for the duplication. + unsigned MaxDuplicateCount; + if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + MaxDuplicateCount = 1; + else + MaxDuplicateCount = TailDuplicateSize; + + if (PreRegAlloc) { + // Pre-regalloc tail duplication hurts compile time and doesn't help + // much except for indirect branches. + if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch()) + return false; + // If the target has hardware branch prediction that can handle indirect + // branches, duplicating them can often make them predictable when there + // are common paths through the code. The limit needs to be high enough + // to allow undoing the effects of tail merging and other optimizations + // that rearrange the predecessors of the indirect branch. + MaxDuplicateCount = 20; + } + + // Don't try to tail-duplicate single-block loops. + if (TailBB->isSuccessor(TailBB)) + return false; + + // Check the instructions in the block to determine whether tail-duplication + // is invalid or unlikely to be profitable. + unsigned InstrCount = 0; + bool HasCall = false; + for (MachineBasicBlock::iterator I = TailBB->begin(); + I != TailBB->end(); ++I) { + // Non-duplicable things shouldn't be tail-duplicated. + if (I->getDesc().isNotDuplicable()) return false; + // Do not duplicate 'return' instructions if this is a pre-regalloc run. + // A return may expand into a lot more instructions (e.g. reload of callee + // saved registers) after PEI. + if (PreRegAlloc && I->getDesc().isReturn()) return false; + // Don't duplicate more than the threshold. + if (InstrCount == MaxDuplicateCount) return false; + // Remember if we saw a call. + if (I->getDesc().isCall()) HasCall = true; + if (!I->isPHI() && !I->isDebugValue()) + InstrCount += 1; + } + // Heuristically, don't tail-duplicate calls if it would expand code size, + // as it's less likely to be worth the extra cost. + if (InstrCount > 1 && HasCall) + return false; + + DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + + // Iterate through all the unique predecessors and tail-duplicate this + // block into them, if possible. Copying the list ahead of time also + // avoids trouble with the predecessor list reallocating. + bool Changed = false; + SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + assert(TailBB != PredBB && + "Single-block loop should have been rejected earlier!"); + if (PredBB->succ_size() > 1) continue; + + MachineBasicBlock *PredTBB, *PredFBB; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + if (!PredCond.empty()) + continue; + // EH edges are ignored by AnalyzeBranch. + if (PredBB->succ_size() != 1) + continue; + // Don't duplicate into a fall-through predecessor (at least for now). + if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) + continue; + + DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From Succ: " << *TailBB); + + TDBBs.push_back(PredBB); + + // Remove PredBB's unconditional branch. + TII->RemoveBranch(*PredBB); + + // Clone the contents of TailBB into PredBB. + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + while (I != TailBB->end()) { + MachineInstr *MI = &*I; + ++I; + if (MI->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos); + } else { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap); + } + } + MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first).addReg(CopyInfos[i].second)); + } + NumInstrDups += TailBB->size() - 1; // subtract one for removed branch + + // Update the CFG. + PredBB->removeSuccessor(PredBB->succ_begin()); + assert(PredBB->succ_empty() && + "TailDuplicate called on block with multiple successors!"); + for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), + E = TailBB->succ_end(); I != E; ++I) + PredBB->addSuccessor(*I); + + Changed = true; + ++NumTailDups; + } + + // If TailBB was duplicated into all its predecessors except for the prior + // block, which falls through unconditionally, move the contents of this + // block into the prior block. + MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); + MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + SmallVector<MachineOperand, 4> PriorCond; + bool PriorUnAnalyzable = + TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true); + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && + TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 && + !TailBB->hasAddressTaken()) { + DEBUG(dbgs() << "\nMerging into block: " << *PrevBB + << "From MBB: " << *TailBB); + if (PreRegAlloc) { + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos); + if (MI->getParent()) + MI->eraseFromParent(); + } + + // Now copy the non-PHI instructions. + while (I != TailBB->end()) { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + MachineInstr *MI = &*I++; + DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap); + MI->eraseFromParent(); + } + MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first) + .addReg(CopyInfos[i].second)); + } + } else { + // No PHIs to worry about, just splice the instructions over. + PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); + } + PrevBB->removeSuccessor(PrevBB->succ_begin()); + assert(PrevBB->succ_empty()); + PrevBB->transferSuccessors(TailBB); + TDBBs.push_back(PrevBB); + Changed = true; + } + + return Changed; +} + +/// RemoveDeadBlock - Remove the specified dead machine basic block from the +/// function, updating the CFG. +void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { + assert(MBB->pred_empty() && "MBB must be dead!"); + DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); + + // Remove all successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end()-1); + + // Remove the block. + MBB->eraseFromParent(); +} + diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp new file mode 100644 index 0000000..6e4a0d8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -0,0 +1,421 @@ +//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetInstrInfoImpl class, it just provides default +// implementations of various methods. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PostRAHazardRecognizer.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. +void +TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + + // Remove all the old successors of MBB from the CFG. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_begin()); + + // Remove all the dead instructions from the end of MBB. + MBB->erase(Tail, MBB->end()); + + // If MBB isn't immediately before MBB, insert a branch to it. + if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) + InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(), + Tail->getDebugLoc()); + MBB->addSuccessor(NewDest); +} + +// commuteInstruction - The default implementation of this method just exchanges +// the two operands returned by findCommutedOpIndices. +MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, + bool NewMI) const { + const TargetInstrDesc &TID = MI->getDesc(); + bool HasDef = TID.getNumDefs(); + if (HasDef && !MI->getOperand(0).isReg()) + // No idea how to commute this instruction. Target should implement its own. + return 0; + unsigned Idx1, Idx2; + if (!findCommutedOpIndices(MI, Idx1, Idx2)) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Don't know how to commute: " << *MI; + report_fatal_error(Msg.str()); + } + + assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && + "This only knows how to commute register operands so far"); + unsigned Reg1 = MI->getOperand(Idx1).getReg(); + unsigned Reg2 = MI->getOperand(Idx2).getReg(); + bool Reg1IsKill = MI->getOperand(Idx1).isKill(); + bool Reg2IsKill = MI->getOperand(Idx2).isKill(); + bool ChangeReg0 = false; + if (HasDef && MI->getOperand(0).getReg() == Reg1) { + // Must be two address instruction! + assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + "Expecting a two-address instruction!"); + Reg2IsKill = false; + ChangeReg0 = true; + } + + if (NewMI) { + // Create a new instruction. + unsigned Reg0 = HasDef + ? (ChangeReg0 ? Reg2 : MI->getOperand(0).getReg()) : 0; + bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false; + MachineFunction &MF = *MI->getParent()->getParent(); + if (HasDef) + return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) + .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) + .addReg(Reg2, getKillRegState(Reg2IsKill)) + .addReg(Reg1, getKillRegState(Reg2IsKill)); + else + return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) + .addReg(Reg2, getKillRegState(Reg2IsKill)) + .addReg(Reg1, getKillRegState(Reg2IsKill)); + } + + if (ChangeReg0) + MI->getOperand(0).setReg(Reg2); + MI->getOperand(Idx2).setReg(Reg1); + MI->getOperand(Idx1).setReg(Reg2); + MI->getOperand(Idx2).setIsKill(Reg1IsKill); + MI->getOperand(Idx1).setIsKill(Reg2IsKill); + return MI; +} + +/// findCommutedOpIndices - If specified MI is commutable, return the two +/// operand indices that would swap value. Return true if the instruction +/// is not in a form which this routine understands. +bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isCommutable()) + return false; + // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this + // is not true, then the target must implement this. + SrcOpIdx1 = TID.getNumDefs(); + SrcOpIdx2 = SrcOpIdx1 + 1; + if (!MI->getOperand(SrcOpIdx1).isReg() || + !MI->getOperand(SrcOpIdx2).isReg()) + // No idea. + return false; + return true; +} + + +bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + bool MadeChange = false; + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isPredicable()) + return false; + + for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (TID.OpInfo[i].isPredicate()) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + MO.setReg(Pred[j].getReg()); + MadeChange = true; + } else if (MO.isImm()) { + MO.setImm(Pred[j].getImm()); + MadeChange = true; + } else if (MO.isMBB()) { + MO.setMBB(Pred[j].getMBB()); + MadeChange = true; + } + ++j; + } + } + return MadeChange; +} + +void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo &TRI) const { + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); + MBB.insert(I, MI); +} + +bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1) const { + return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); +} + +MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig, + MachineFunction &MF) const { + assert(!Orig->getDesc().isNotDuplicable() && + "Instruction cannot be duplicated"); + return MF.CloneMachineInstr(Orig); +} + +// If the COPY instruction in MI can be folded to a stack operation, return +// the register class to use. +static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, + unsigned FoldIdx) { + assert(MI->isCopy() && "MI must be a COPY instruction"); + if (MI->getNumOperands() != 2) + return 0; + assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); + + const MachineOperand &FoldOp = MI->getOperand(FoldIdx); + const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx); + + if (FoldOp.getSubReg() || LiveOp.getSubReg()) + return 0; + + unsigned FoldReg = FoldOp.getReg(); + unsigned LiveReg = LiveOp.getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && + "Cannot fold physregs"); + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); + + if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) + return RC->contains(LiveOp.getReg()) ? RC : 0; + + const TargetRegisterClass *LiveRC = MRI.getRegClass(LiveReg); + if (RC == LiveRC || RC->hasSubClass(LiveRC)) + return RC; + + // FIXME: Allow folding when register classes are memory compatible. + return 0; +} + +bool TargetInstrInfoImpl:: +canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const { + return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); +} + +/// foldMemoryOperand - Attempt to fold a load or store of the specified stack +/// slot into the specified machine instruction for the specified operand(s). +/// If this is possible, a new instruction is returned with the specified +/// operand folded, otherwise NULL is returned. The client is responsible for +/// removing the old instruction and adding the new one in the instruction +/// stream. +MachineInstr* +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl<unsigned> &Ops, + int FI) const { + unsigned Flags = 0; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (MI->getOperand(Ops[i]).isDef()) + Flags |= MachineMemOperand::MOStore; + else + Flags |= MachineMemOperand::MOLoad; + + MachineBasicBlock *MBB = MI->getParent(); + assert(MBB && "foldMemoryOperand needs an inserted instruction"); + MachineFunction &MF = *MBB->getParent(); + + // Ask the target to do the actual folding. + if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + // Add a memory operand, foldMemoryOperandImpl doesn't do that. + assert((!(Flags & MachineMemOperand::MOStore) || + NewMI->getDesc().mayStore()) && + "Folded a def to a non-store!"); + assert((!(Flags & MachineMemOperand::MOLoad) || + NewMI->getDesc().mayLoad()) && + "Folded a use to a non-load!"); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FI) != -1); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + Flags, /*Offset=*/0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + NewMI->addMemOperand(MF, MMO); + + // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI. + return MBB->insert(MI, NewMI); + } + + // Straight COPY may fold as load/store. + if (!MI->isCopy() || Ops.size() != 1) + return 0; + + const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); + if (!RC) + return 0; + + const MachineOperand &MO = MI->getOperand(1-Ops[0]); + MachineBasicBlock::iterator Pos = MI; + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + + if (Flags == MachineMemOperand::MOStore) + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); + else + loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI); + return --Pos; +} + +/// foldMemoryOperand - Same as the previous version except it allows folding +/// of any load and store from / to any address, not just from a specific +/// stack slot. +MachineInstr* +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!"); +#ifndef NDEBUG + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); +#endif + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); + + // Ask the target to do the actual folding. + MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); + if (!NewMI) return 0; + + NewMI = MBB.insert(MI, NewMI); + + // Copy the memoperands from the load to the folded instruction. + NewMI->setMemRefs(LoadMI->memoperands_begin(), + LoadMI->memoperands_end()); + + return NewMI; +} + +bool TargetInstrInfo:: +isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, + AliasAnalysis *AA) const { + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetMachine &TM = MF.getTarget(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + + // A load from a fixed stack slot can be rematerialized. This may be + // redundant with subsequent checks, but it's target-independent, + // simple, and a common case. + int FrameIdx = 0; + if (TII.isLoadFromStackSlot(MI, FrameIdx) && + MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) + return true; + + const TargetInstrDesc &TID = MI->getDesc(); + + // Avoid instructions obviously unsafe for remat. + if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() || + TID.mayStore()) + return false; + + // Avoid instructions which load from potentially varying memory. + if (TID.mayLoad() && !MI->isInvariantLoad(AA)) + return false; + + // If any of the registers accessed are non-constant, conservatively assume + // the instruction is not rematerializable. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + // Check for a well-behaved physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!MRI.def_empty(Reg)) + return false; + BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0); + if (AllocatableRegs.test(Reg)) + return false; + // Check for a def among the register's aliases too. + for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (!MRI.def_empty(AliasReg)) + return false; + if (AllocatableRegs.test(AliasReg)) + return false; + } + } else { + // A physreg def. We can't remat it. + return false; + } + continue; + } + + // Only allow one virtual-register def, and that in the first operand. + if (MO.isDef() != (i == 0)) + return false; + + // For the def, it should be the only def of that register. + if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() || + MRI.isLiveIn(Reg))) + return false; + + // Don't allow any virtual-register uses. Rematting an instruction with + // virtual register uses would length the live ranges of the uses, which + // is not necessarily a good idea, certainly not "trivial". + if (MO.isUse()) + return false; + } + + // Everything checked out. + return true; +} + +/// isSchedulingBoundary - Test if the given instruction should be +/// considered a scheduling boundary. This primarily includes labels +/// and terminators. +bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const{ + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel()) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); + if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore())) + return true; + + return false; +} + +// Default implementation of CreateTargetPostRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfoImpl:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { + return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II); +} diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp new file mode 100644 index 0000000..f1e10ee --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -0,0 +1,1005 @@ +//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements classes used to handle lowerings specific to common +// object file formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; +using namespace dwarf; + +//===----------------------------------------------------------------------===// +// ELF +//===----------------------------------------------------------------------===// + +void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFile::Initialize(Ctx, TM); + + BSSSection = + getContext().getELFSection(".bss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); + + TextSection = + getContext().getELFSection(".text", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_EXECINSTR | + MCSectionELF::SHF_ALLOC, + SectionKind::getText()); + + DataSection = + getContext().getELFSection(".data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + + ReadOnlySection = + getContext().getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, + SectionKind::getReadOnly()); + + TLSDataSection = + getContext().getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, + SectionKind::getThreadData()); + + TLSBSSSection = + getContext().getELFSection(".tbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, + SectionKind::getThreadBSS()); + + DataRelSection = + getContext().getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + DataRelLocalSection = + getContext().getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRelLocal()); + + DataRelROSection = + getContext().getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRel()); + + DataRelROLocalSection = + getContext().getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRelLocal()); + + MergeableConst4Section = + getContext().getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst4()); + + MergeableConst8Section = + getContext().getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst8()); + + MergeableConst16Section = + getContext().getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst16()); + + StaticCtorSection = + getContext().getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + StaticDtorSection = + getContext().getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + // Exception Handling Sections. + + // FIXME: We're emitting LSDA info into a readonly section on ELF, even though + // it contains relocatable pointers. In PIC mode, this is probably a big + // runtime hit for C++ apps. Either the contents of the LSDA need to be + // adjusted or this should be a data section. + LSDASection = + getContext().getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, + SectionKind::getReadOnly()); + EHFrameSection = + getContext().getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + // Debug Info Sections. + DwarfAbbrevSection = + getContext().getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfInfoSection = + getContext().getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfLineSection = + getContext().getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfFrameSection = + getContext().getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfPubNamesSection = + getContext().getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfPubTypesSection = + getContext().getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfStrSection = + getContext().getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfLocSection = + getContext().getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfARangesSection = + getContext().getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfRangesSection = + getContext().getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + getContext().getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); +} + + +static SectionKind +getELFKindForNamedSection(StringRef Name, SectionKind K) { + if (Name.empty() || Name[0] != '.') return K; + + // Some lame default implementation based on some magic section names. + if (Name == ".bss" || + Name.startswith(".bss.") || + Name.startswith(".gnu.linkonce.b.") || + Name.startswith(".llvm.linkonce.b.") || + Name == ".sbss" || + Name.startswith(".sbss.") || + Name.startswith(".gnu.linkonce.sb.") || + Name.startswith(".llvm.linkonce.sb.")) + return SectionKind::getBSS(); + + if (Name == ".tdata" || + Name.startswith(".tdata.") || + Name.startswith(".gnu.linkonce.td.") || + Name.startswith(".llvm.linkonce.td.")) + return SectionKind::getThreadData(); + + if (Name == ".tbss" || + Name.startswith(".tbss.") || + Name.startswith(".gnu.linkonce.tb.") || + Name.startswith(".llvm.linkonce.tb.")) + return SectionKind::getThreadBSS(); + + return K; +} + + +static unsigned getELFSectionType(StringRef Name, SectionKind K) { + + if (Name == ".init_array") + return MCSectionELF::SHT_INIT_ARRAY; + + if (Name == ".fini_array") + return MCSectionELF::SHT_FINI_ARRAY; + + if (Name == ".preinit_array") + return MCSectionELF::SHT_PREINIT_ARRAY; + + if (K.isBSS() || K.isThreadBSS()) + return MCSectionELF::SHT_NOBITS; + + return MCSectionELF::SHT_PROGBITS; +} + + +static unsigned +getELFSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (!K.isMetadata()) + Flags |= MCSectionELF::SHF_ALLOC; + + if (K.isText()) + Flags |= MCSectionELF::SHF_EXECINSTR; + + if (K.isWriteable()) + Flags |= MCSectionELF::SHF_WRITE; + + if (K.isThreadLocal()) + Flags |= MCSectionELF::SHF_TLS; + + // K.isMergeableConst() is left out to honour PR4650 + if (K.isMergeableCString() || K.isMergeableConst4() || + K.isMergeableConst8() || K.isMergeableConst16()) + Flags |= MCSectionELF::SHF_MERGE; + + if (K.isMergeableCString()) + Flags |= MCSectionELF::SHF_STRINGS; + + return Flags; +} + + +const MCSection *TargetLoweringObjectFileELF:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + StringRef SectionName = GV->getSection(); + + // Infer section flags from the section name if we can. + Kind = getELFKindForNamedSection(SectionName, Kind); + + return getContext().getELFSection(SectionName, + getELFSectionType(SectionName, Kind), + getELFSectionFlags(Kind), Kind, true); +} + +static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { + if (Kind.isText()) return ".gnu.linkonce.t."; + if (Kind.isReadOnly()) return ".gnu.linkonce.r."; + + if (Kind.isThreadData()) return ".gnu.linkonce.td."; + if (Kind.isThreadBSS()) return ".gnu.linkonce.tb."; + + if (Kind.isDataNoRel()) return ".gnu.linkonce.d."; + if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local."; + if (Kind.isDataRel()) return ".gnu.linkonce.d.rel."; + if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local."; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return ".gnu.linkonce.d.rel.ro."; +} + +/// getSectionPrefixForGlobal - Return the section prefix name used by options +/// FunctionsSections and DataSections. +static const char *getSectionPrefixForGlobal(SectionKind Kind) { + if (Kind.isText()) return ".text."; + if (Kind.isReadOnly()) return ".rodata."; + + if (Kind.isThreadData()) return ".tdata."; + if (Kind.isThreadBSS()) return ".tbss."; + + if (Kind.isDataNoRel()) return ".data."; + if (Kind.isDataRelLocal()) return ".data.rel.local."; + if (Kind.isDataRel()) return ".data.rel."; + if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local."; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return ".data.rel.ro."; +} + + +const MCSection *TargetLoweringObjectFileELF:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + // If we have -ffunction-section or -fdata-section then we should emit the + // global value to a uniqued section specifically for it. + bool EmitUniquedSection; + if (Kind.isText()) + EmitUniquedSection = TM.getFunctionSections(); + else + EmitUniquedSection = TM.getDataSections(); + + // If this global is linkonce/weak and the target handles this by emitting it + // into a 'uniqued' section name, create and return the section now. + if ((GV->isWeakForLinker() || EmitUniquedSection) && + !Kind.isCommon() && !Kind.isBSS()) { + const char *Prefix; + if (GV->isWeakForLinker()) + Prefix = getSectionPrefixForUniqueGlobal(Kind); + else { + assert(EmitUniquedSection); + Prefix = getSectionPrefixForGlobal(Kind); + } + + SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); + MCSymbol *Sym = Mang->getSymbol(GV); + Name.append(Sym->getName().begin(), Sym->getName().end()); + return getContext().getELFSection(Name.str(), + getELFSectionType(Name.str(), Kind), + getELFSectionFlags(Kind), Kind); + } + + if (Kind.isText()) return TextSection; + + if (Kind.isMergeable1ByteCString() || + Kind.isMergeable2ByteCString() || + Kind.isMergeable4ByteCString()) { + + // We also need alignment here. + // FIXME: this is getting the alignment of the character, not the + // alignment of the global! + unsigned Align = + TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)); + + const char *SizeSpec = ".rodata.str1."; + if (Kind.isMergeable2ByteCString()) + SizeSpec = ".rodata.str2."; + else if (Kind.isMergeable4ByteCString()) + SizeSpec = ".rodata.str4."; + else + assert(Kind.isMergeable1ByteCString() && "unknown string width"); + + + std::string Name = SizeSpec + utostr(Align); + return getContext().getELFSection(Name, MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_MERGE | + MCSectionELF::SHF_STRINGS, + Kind); + } + + if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4() && MergeableConst4Section) + return MergeableConst4Section; + if (Kind.isMergeableConst8() && MergeableConst8Section) + return MergeableConst8Section; + if (Kind.isMergeableConst16() && MergeableConst16Section) + return MergeableConst16Section; + return ReadOnlySection; // .const + } + + if (Kind.isReadOnly()) return ReadOnlySection; + + if (Kind.isThreadData()) return TLSDataSection; + if (Kind.isThreadBSS()) return TLSBSSSection; + + // Note: we claim that common symbols are put in BSSSection, but they are + // really emitted with the magic .comm directive, which creates a symbol table + // entry but not a section. + if (Kind.isBSS() || Kind.isCommon()) return BSSSection; + + if (Kind.isDataNoRel()) return DataSection; + if (Kind.isDataRelLocal()) return DataRelLocalSection; + if (Kind.isDataRel()) return DataRelSection; + if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return DataRelROSection; +} + +/// getSectionForConstant - Given a mergeable constant with the +/// specified size and relocation information, return a section that it +/// should be placed in. +const MCSection *TargetLoweringObjectFileELF:: +getSectionForConstant(SectionKind Kind) const { + if (Kind.isMergeableConst4() && MergeableConst4Section) + return MergeableConst4Section; + if (Kind.isMergeableConst8() && MergeableConst8Section) + return MergeableConst8Section; + if (Kind.isMergeableConst16() && MergeableConst16Section) + return MergeableConst16Section; + if (Kind.isReadOnly()) + return ReadOnlySection; + + if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return DataRelROSection; +} + +const MCExpr *TargetLoweringObjectFileELF:: +getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + unsigned Encoding, MCStreamer &Streamer) const { + + if (Encoding & dwarf::DW_EH_PE_indirect) { + MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += ".DW.stub"; + + // Add information about the stub reference to ELFMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); + if (StubSym.getPointer() == 0) { + MCSymbol *Sym = Mang->getSymbol(GV); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return TargetLoweringObjectFile:: + getExprForDwarfReference(SSym, Mang, MMI, + Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + } + + return TargetLoweringObjectFile:: + getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); +} + +//===----------------------------------------------------------------------===// +// MachO +//===----------------------------------------------------------------------===// + +void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + // _foo.eh symbols are currently always exported so that the linker knows + // about them. This is not necessary on 10.6 and later, but it + // doesn't hurt anything. + // FIXME: I need to get this from Triple. + IsFunctionEHSymbolGlobal = true; + IsFunctionEHFrameSymbolPrivate = false; + SupportsWeakOmittedEHFrame = false; + + TargetLoweringObjectFile::Initialize(Ctx, TM); + + TextSection // .text + = getContext().getMachOSection("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + DataSection // .data + = getContext().getMachOSection("__DATA", "__data", 0, + SectionKind::getDataRel()); + + TLSDataSection // .tdata + = getContext().getMachOSection("__DATA", "__thread_data", + MCSectionMachO::S_THREAD_LOCAL_REGULAR, + SectionKind::getDataRel()); + TLSBSSSection // .tbss + = getContext().getMachOSection("__DATA", "__thread_bss", + MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, + SectionKind::getThreadBSS()); + + // TODO: Verify datarel below. + TLSTLVSection // .tlv + = getContext().getMachOSection("__DATA", "__thread_vars", + MCSectionMachO::S_THREAD_LOCAL_VARIABLES, + SectionKind::getDataRel()); + + TLSThreadInitSection + = getContext().getMachOSection("__DATA", "__thread_init", + MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS, + SectionKind::getDataRel()); + + CStringSection // .cstring + = getContext().getMachOSection("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS, + SectionKind::getMergeable1ByteCString()); + UStringSection + = getContext().getMachOSection("__TEXT","__ustring", 0, + SectionKind::getMergeable2ByteCString()); + FourByteConstantSection // .literal4 + = getContext().getMachOSection("__TEXT", "__literal4", + MCSectionMachO::S_4BYTE_LITERALS, + SectionKind::getMergeableConst4()); + EightByteConstantSection // .literal8 + = getContext().getMachOSection("__TEXT", "__literal8", + MCSectionMachO::S_8BYTE_LITERALS, + SectionKind::getMergeableConst8()); + + // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back + // to using it in -static mode. + SixteenByteConstantSection = 0; + if (TM.getRelocationModel() != Reloc::Static && + TM.getTargetData()->getPointerSize() == 32) + SixteenByteConstantSection = // .literal16 + getContext().getMachOSection("__TEXT", "__literal16", + MCSectionMachO::S_16BYTE_LITERALS, + SectionKind::getMergeableConst16()); + + ReadOnlySection // .const + = getContext().getMachOSection("__TEXT", "__const", 0, + SectionKind::getReadOnly()); + + TextCoalSection + = getContext().getMachOSection("__TEXT", "__textcoal_nt", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + ConstTextCoalSection + = getContext().getMachOSection("__TEXT", "__const_coal", + MCSectionMachO::S_COALESCED, + SectionKind::getReadOnly()); + ConstDataSection // .const_data + = getContext().getMachOSection("__DATA", "__const", 0, + SectionKind::getReadOnlyWithRel()); + DataCoalSection + = getContext().getMachOSection("__DATA","__datacoal_nt", + MCSectionMachO::S_COALESCED, + SectionKind::getDataRel()); + DataCommonSection + = getContext().getMachOSection("__DATA","__common", + MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); + DataBSSSection + = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); + + + LazySymbolPointerSection + = getContext().getMachOSection("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + NonLazySymbolPointerSection + = getContext().getMachOSection("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + + if (TM.getRelocationModel() == Reloc::Static) { + StaticCtorSection + = getContext().getMachOSection("__TEXT", "__constructor", 0, + SectionKind::getDataRel()); + StaticDtorSection + = getContext().getMachOSection("__TEXT", "__destructor", 0, + SectionKind::getDataRel()); + } else { + StaticCtorSection + = getContext().getMachOSection("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, + SectionKind::getDataRel()); + StaticDtorSection + = getContext().getMachOSection("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, + SectionKind::getDataRel()); + } + + // Exception Handling. + LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0, + SectionKind::getReadOnlyWithRel()); + EHFrameSection = + getContext().getMachOSection("__TEXT", "__eh_frame", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_NO_TOC | + MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | + MCSectionMachO::S_ATTR_LIVE_SUPPORT, + SectionKind::getReadOnly()); + + // Debug Information. + DwarfAbbrevSection = + getContext().getMachOSection("__DWARF", "__debug_abbrev", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfInfoSection = + getContext().getMachOSection("__DWARF", "__debug_info", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfLineSection = + getContext().getMachOSection("__DWARF", "__debug_line", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfFrameSection = + getContext().getMachOSection("__DWARF", "__debug_frame", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfPubNamesSection = + getContext().getMachOSection("__DWARF", "__debug_pubnames", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfPubTypesSection = + getContext().getMachOSection("__DWARF", "__debug_pubtypes", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfStrSection = + getContext().getMachOSection("__DWARF", "__debug_str", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfLocSection = + getContext().getMachOSection("__DWARF", "__debug_loc", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfARangesSection = + getContext().getMachOSection("__DWARF", "__debug_aranges", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfRangesSection = + getContext().getMachOSection("__DWARF", "__debug_ranges", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + getContext().getMachOSection("__DWARF", "__debug_macinfo", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfDebugInlineSection = + getContext().getMachOSection("__DWARF", "__debug_inlined", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + + TLSExtraDataSection = TLSTLVSection; +} + +const MCSection *TargetLoweringObjectFileMachO:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + // Parse the section specifier and create it if valid. + StringRef Segment, Section; + unsigned TAA, StubSize; + std::string ErrorCode = + MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, + TAA, StubSize); + if (!ErrorCode.empty()) { + // If invalid, report the error with report_fatal_error. + report_fatal_error("Global variable '" + GV->getNameStr() + + "' has an invalid section specifier '" + GV->getSection()+ + "': " + ErrorCode + "."); + // Fall back to dropping it into the data section. + return DataSection; + } + + // Get the section. + const MCSectionMachO *S = + getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); + + // Okay, now that we got the section, verify that the TAA & StubSize agree. + // If the user declared multiple globals with different section flags, we need + // to reject it here. + if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { + // If invalid, report the error with report_fatal_error. + report_fatal_error("Global variable '" + GV->getNameStr() + + "' section type or attributes does not match previous" + " section specifier"); + } + + return S; +} + +const MCSection *TargetLoweringObjectFileMachO:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + // Handle thread local data. + if (Kind.isThreadBSS()) return TLSBSSSection; + if (Kind.isThreadData()) return TLSDataSection; + + if (Kind.isText()) + return GV->isWeakForLinker() ? TextCoalSection : TextSection; + + // If this is weak/linkonce, put this in a coalescable section, either in text + // or data depending on if it is writable. + if (GV->isWeakForLinker()) { + if (Kind.isReadOnly()) + return ConstTextCoalSection; + return DataCoalSection; + } + + // FIXME: Alignment check should be handled by section classifier. + if (Kind.isMergeable1ByteCString() && + TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + return CStringSection; + + // Do not put 16-bit arrays in the UString section if they have an + // externally visible label, this runs into issues with certain linker + // versions. + if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && + TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + return UStringSection; + + if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16() && SixteenByteConstantSection) + return SixteenByteConstantSection; + } + + // Otherwise, if it is readonly, but not something we can specially optimize, + // just drop it in .const. + if (Kind.isReadOnly()) + return ReadOnlySection; + + // If this is marked const, put it into a const section. But if the dynamic + // linker needs to write to it, put it in the data segment. + if (Kind.isReadOnlyWithRel()) + return ConstDataSection; + + // Put zero initialized globals with strong external linkage in the + // DATA, __common section with the .zerofill directive. + if (Kind.isBSSExtern()) + return DataCommonSection; + + // Put zero initialized globals with local linkage in __DATA,__bss directive + // with the .zerofill directive (aka .lcomm). + if (Kind.isBSSLocal()) + return DataBSSSection; + + // Otherwise, just drop the variable in the normal data section. + return DataSection; +} + +const MCSection * +TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { + // If this constant requires a relocation, we have to put it in the data + // segment, not in the text segment. + if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) + return ConstDataSection; + + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16() && SixteenByteConstantSection) + return SixteenByteConstantSection; + return ReadOnlySection; // .const +} + +/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide +/// not to emit the UsedDirective for some symbols in llvm.used. +// FIXME: REMOVE this (rdar://7071300) +bool TargetLoweringObjectFileMachO:: +shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { + /// On Darwin, internally linked data beginning with "L" or "l" does not have + /// the directive emitted (this occurs in ObjC metadata). + if (!GV) return false; + + // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix. + if (GV->hasLocalLinkage() && !isa<Function>(GV)) { + // FIXME: ObjC metadata is currently emitted as internal symbols that have + // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and + // this horrible hack can go away. + MCSymbol *Sym = Mang->getSymbol(GV); + if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l') + return false; + } + + return true; +} + +const MCExpr *TargetLoweringObjectFileMachO:: +getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding, + MCStreamer &Streamer) const { + // The mach-o version of this method defaults to returning a stub reference. + + if (Encoding & DW_EH_PE_indirect) { + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); + if (StubSym.getPointer() == 0) { + MCSymbol *Sym = Mang->getSymbol(GV); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return TargetLoweringObjectFile:: + getExprForDwarfReference(SSym, Mang, MMI, + Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + } + + return TargetLoweringObjectFile:: + getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); +} + +unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const { + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const { + return DW_EH_PE_pcrel; +} + +unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const { + return DW_EH_PE_pcrel; +} + +unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const { + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +//===----------------------------------------------------------------------===// +// COFF +//===----------------------------------------------------------------------===// + +void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFile::Initialize(Ctx, TM); + TextSection = + getContext().getCOFFSection(".text", + COFF::IMAGE_SCN_CNT_CODE | + COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getText()); + DataSection = + getContext().getCOFFSection(".data", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + ReadOnlySection = + getContext().getCOFFSection(".rdata", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + StaticCtorSection = + getContext().getCOFFSection(".ctors", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + StaticDtorSection = + getContext().getCOFFSection(".dtors", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + + // FIXME: We're emitting LSDA info into a readonly section on COFF, even + // though it contains relocatable pointers. In PIC mode, this is probably a + // big runtime hit for C++ apps. Either the contents of the LSDA need to be + // adjusted or this should be a data section. + LSDASection = + getContext().getCOFFSection(".gcc_except_table", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + EHFrameSection = + getContext().getCOFFSection(".eh_frame", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + + // Debug info. + DwarfAbbrevSection = + getContext().getCOFFSection(".debug_abbrev", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfInfoSection = + getContext().getCOFFSection(".debug_info", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfLineSection = + getContext().getCOFFSection(".debug_line", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfFrameSection = + getContext().getCOFFSection(".debug_frame", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfPubNamesSection = + getContext().getCOFFSection(".debug_pubnames", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfPubTypesSection = + getContext().getCOFFSection(".debug_pubtypes", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfStrSection = + getContext().getCOFFSection(".debug_str", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfLocSection = + getContext().getCOFFSection(".debug_loc", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfARangesSection = + getContext().getCOFFSection(".debug_aranges", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfRangesSection = + getContext().getCOFFSection(".debug_ranges", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + getContext().getCOFFSection(".debug_macinfo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + + DrectveSection = + getContext().getCOFFSection(".drectve", + COFF::IMAGE_SCN_LNK_INFO, + SectionKind::getMetadata()); +} + +static unsigned +getCOFFSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (K.isMetadata()) + Flags |= + COFF::IMAGE_SCN_MEM_DISCARDABLE; + else if (K.isText()) + Flags |= + COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_CNT_CODE; + else if (K.isBSS ()) + Flags |= + COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + else if (K.isReadOnly()) + Flags |= + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ; + else if (K.isWriteable()) + Flags |= + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + + return Flags; +} + +const MCSection *TargetLoweringObjectFileCOFF:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + return getContext().getCOFFSection(GV->getSection(), + getCOFFSectionFlags(Kind), + Kind); +} + +static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { + if (Kind.isText()) + return ".text$linkonce"; + if (Kind.isBSS ()) + return ".bss$linkonce"; + if (Kind.isWriteable()) + return ".data$linkonce"; + return ".rdata$linkonce"; +} + + +const MCSection *TargetLoweringObjectFileCOFF:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + assert(!Kind.isThreadLocal() && "Doesn't support TLS"); + + // If this global is linkonce/weak and the target handles this by emitting it + // into a 'uniqued' section name, create and return the section now. + if (GV->isWeakForLinker()) { + const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); + SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); + MCSymbol *Sym = Mang->getSymbol(GV); + Name.append(Sym->getName().begin(), Sym->getName().end()); + + unsigned Characteristics = getCOFFSectionFlags(Kind); + + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + + return getContext().getCOFFSection(Name.str(), Characteristics, + COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind); + } + + if (Kind.isText()) + return getTextSection(); + + return getDataSection(); +} + diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp new file mode 100644 index 0000000..78989c5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -0,0 +1,1498 @@ +//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TwoAddress instruction pass which is used +// by most register allocators. Two-Address instructions are rewritten +// from: +// +// A = B op C +// +// to: +// +// A = B +// A op= C +// +// Note that if a register allocator chooses to use this pass, that it +// has to be capable of handling the non-SSA nature of these rewritten +// virtual registers. +// +// It is also worth noting that the duplicate operand of the two +// address instruction is removed. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "twoaddrinstr" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); +STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); +STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); +STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); +STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); +STATISTIC(NumReMats, "Number of instructions re-materialized"); +STATISTIC(NumDeletes, "Number of dead instructions deleted"); + +namespace { + class TwoAddressInstructionPass : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + LiveVariables *LV; + AliasAnalysis *AA; + + // DistanceMap - Keep track the distance of a MI from the start of the + // current basic block. + DenseMap<MachineInstr*, unsigned> DistanceMap; + + // SrcRegMap - A map from virtual registers to physical registers which + // are likely targets to be coalesced to due to copies from physical + // registers to virtual registers. e.g. v1024 = move r0. + DenseMap<unsigned, unsigned> SrcRegMap; + + // DstRegMap - A map from virtual registers to physical registers which + // are likely targets to be coalesced to due to copies to physical + // registers from virtual registers. e.g. r1 = move v1024. + DenseMap<unsigned, unsigned> DstRegMap; + + /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen + /// during the initial walk of the machine function. + SmallVector<MachineInstr*, 16> RegSequences; + + bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI, + unsigned Reg, + MachineBasicBlock::iterator OldPos); + + bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC, + MachineInstr *MI, MachineInstr *DefMI, + MachineBasicBlock *MBB, unsigned Loc); + + bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, + unsigned &LastDef); + + MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB, + unsigned Dist); + + bool isProfitableToCommute(unsigned regB, unsigned regC, + MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Dist); + + bool CommuteInstruction(MachineBasicBlock::iterator &mi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned RegC, unsigned Dist); + + bool isProfitableToConv3Addr(unsigned RegA); + + bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned Dist); + + typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill; + bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, + SmallVector<NewKill, 4> &NewKills, + MachineBasicBlock *MBB, unsigned Dist); + bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, unsigned Dist); + + bool TryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist); + + void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &Processed); + + void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg); + + /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part + /// of the de-ssa process. This replaces sources of REG_SEQUENCE as + /// sub-register references of the register defined by REG_SEQUENCE. + bool EliminateRegSequences(); + + public: + static char ID; // Pass identification, replacement for typeid + TwoAddressInstructionPass() : MachineFunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<LiveVariables>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + if (StrongPHIElim) + AU.addPreservedID(StrongPHIEliminationID); + else + AU.addPreservedID(PHIEliminationID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - Pass entry point. + bool runOnMachineFunction(MachineFunction&); + }; +} + +char TwoAddressInstructionPass::ID = 0; +INITIALIZE_PASS(TwoAddressInstructionPass, "twoaddressinstruction", + "Two-Address instruction pass", false, false); + +char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; + +/// Sink3AddrInstruction - A two-address instruction has been converted to a +/// three-address instruction to avoid clobbering a register. Try to sink it +/// past the instruction that would kill the above mentioned register to reduce +/// register pressure. +bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, + MachineInstr *MI, unsigned SavedReg, + MachineBasicBlock::iterator OldPos) { + // Check if it's safe to move this instruction. + bool SeenStore = true; // Be conservative. + if (!MI->isSafeToMove(TII, AA, SeenStore)) + return false; + + unsigned DefReg = 0; + SmallSet<unsigned, 4> UseRegs; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isUse() && MOReg != SavedReg) + UseRegs.insert(MO.getReg()); + if (!MO.isDef()) + continue; + if (MO.isImplicit()) + // Don't try to move it if it implicitly defines a register. + return false; + if (DefReg) + // For now, don't move any instructions that define multiple registers. + return false; + DefReg = MO.getReg(); + } + + // Find the instruction that kills SavedReg. + MachineInstr *KillMI = NULL; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SavedReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + if (!UseMO.isKill()) + continue; + KillMI = UseMO.getParent(); + break; + } + + if (!KillMI || KillMI->getParent() != MBB || KillMI == MI) + return false; + + // If any of the definitions are used by another instruction between the + // position and the kill use, then it's not safe to sink it. + // + // FIXME: This can be sped up if there is an easy way to query whether an + // instruction is before or after another instruction. Then we can use + // MachineRegisterInfo def / use instead. + MachineOperand *KillMO = NULL; + MachineBasicBlock::iterator KillPos = KillMI; + ++KillPos; + + unsigned NumVisited = 0; + for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (DefReg == MOReg) + return false; + + if (MO.isKill()) { + if (OtherMI == KillMI && MOReg == SavedReg) + // Save the operand that kills the register. We want to unset the kill + // marker if we can sink MI past it. + KillMO = &MO; + else if (UseRegs.count(MOReg)) + // One of the uses is killed before the destination. + return false; + } + } + } + + // Update kill and LV information. + KillMO->setIsKill(false); + KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); + KillMO->setIsKill(true); + + if (LV) + LV->replaceKillInstruction(SavedReg, KillMI, MI); + + // Move instruction to its destination. + MBB->remove(MI); + MBB->insert(KillPos, MI); + + ++Num3AddrSunk; + return true; +} + +/// isTwoAddrUse - Return true if the specified MI is using the specified +/// register as a two-address operand. +static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { + const TargetInstrDesc &TID = UseMI->getDesc(); + for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && + (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) + // Earlier use is a two-address one. + return true; + } + return false; +} + +/// isProfitableToReMat - Return true if the heuristics determines it is likely +/// to be profitable to re-materialize the definition of Reg rather than copy +/// the register. +bool +TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, + const TargetRegisterClass *RC, + MachineInstr *MI, MachineInstr *DefMI, + MachineBasicBlock *MBB, unsigned Loc) { + bool OtherUse = false; + for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = UseMO.getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (UseMBB == MBB) { + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end() && DI->second == Loc) + continue; // Current use. + OtherUse = true; + // There is at least one other use in the MBB that will clobber the + // register. + if (isTwoAddrUse(UseMI, Reg)) + return true; + } + } + + // If other uses in MBB are not two-address uses, then don't remat. + if (OtherUse) + return false; + + // No other uses in the same block, remat if it's defined in the same + // block so it does not unnecessarily extend the live range. + return MBB == DefMI->getParent(); +} + +/// NoUseAfterLastDef - Return true if there are no intervening uses between the +/// last instruction in the MBB that defines the specified register and the +/// two-address instruction which is being processed. It also returns the last +/// def location by reference +bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, + MachineBasicBlock *MBB, unsigned Dist, + unsigned &LastDef) { + LastDef = 0; + unsigned LastUse = Dist; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), + E = MRI->reg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB || MI->isDebugValue()) + continue; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + continue; + if (MO.isUse() && DI->second < LastUse) + LastUse = DI->second; + if (MO.isDef() && DI->second > LastDef) + LastDef = DI->second; + } + + return !(LastUse > LastDef && LastUse < Dist); +} + +MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg, + MachineBasicBlock *MBB, + unsigned Dist) { + unsigned LastUseDist = 0; + MachineInstr *LastUse = 0; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), + E = MRI->reg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB || MI->isDebugValue()) + continue; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + continue; + if (DI->second >= Dist) + continue; + + if (MO.isUse() && DI->second > LastUseDist) { + LastUse = DI->first; + LastUseDist = DI->second; + } + } + return LastUse; +} + +/// isCopyToReg - Return true if the specified MI is a copy instruction or +/// a extract_subreg instruction. It also returns the source and destination +/// registers and whether they are physical registers by reference. +static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, + unsigned &SrcReg, unsigned &DstReg, + bool &IsSrcPhys, bool &IsDstPhys) { + SrcReg = 0; + DstReg = 0; + if (MI.isCopy()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + } else if (MI.isInsertSubreg() || MI.isSubregToReg()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + } else + return false; + + IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return true; +} + +/// isKilled - Test if the given register value, which is used by the given +/// instruction, is killed by the given instruction. This looks through +/// coalescable copies to see if the original value is potentially not killed. +/// +/// For example, in this code: +/// +/// %reg1034 = copy %reg1024 +/// %reg1035 = copy %reg1025<kill> +/// %reg1036 = add %reg1034<kill>, %reg1035<kill> +/// +/// %reg1034 is not considered to be killed, since it is copied from a +/// register which is not killed. Treating it as not killed lets the +/// normal heuristics commute the (two-address) add, which lets +/// coalescing eliminate the extra copy. +/// +static bool isKilled(MachineInstr &MI, unsigned Reg, + const MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + MachineInstr *DefMI = &MI; + for (;;) { + if (!DefMI->killsRegister(Reg)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return true; + MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); + // If there are multiple defs, we can't do a simple analysis, so just + // go with what the kill flag says. + if (llvm::next(Begin) != MRI->def_end()) + return true; + DefMI = &*Begin; + bool IsSrcPhys, IsDstPhys; + unsigned SrcReg, DstReg; + // If the def is something other than a copy, then it isn't going to + // be coalesced, so follow the kill flag. + if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) + return true; + Reg = SrcReg; + } +} + +/// isTwoAddrUse - Return true if the specified MI uses the specified register +/// as a two-address use. If so, return the destination register by reference. +static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned NumOps = MI.isInlineAsm() ? MI.getNumOperands():TID.getNumOperands(); + for (unsigned i = 0; i != NumOps; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + unsigned ti; + if (MI.isRegTiedToDefOperand(i, &ti)) { + DstReg = MI.getOperand(ti).getReg(); + return true; + } + } + return false; +} + +/// findOnlyInterestingUse - Given a register, if has a single in-basic block +/// use, return the use instruction if it's a copy or a two-address use. +static +MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII, + bool &IsCopy, + unsigned &DstReg, bool &IsDstPhys) { + if (!MRI->hasOneNonDBGUse(Reg)) + // None or more than one use. + return 0; + MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg); + if (UseMI.getParent() != MBB) + return 0; + unsigned SrcReg; + bool IsSrcPhys; + if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) { + IsCopy = true; + return &UseMI; + } + IsDstPhys = false; + if (isTwoAddrUse(UseMI, Reg, DstReg)) { + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return &UseMI; + } + return 0; +} + +/// getMappedReg - Return the physical register the specified virtual register +/// might be mapped to. +static unsigned +getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) { + while (TargetRegisterInfo::isVirtualRegister(Reg)) { + DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg); + if (SI == RegMap.end()) + return 0; + Reg = SI->second; + } + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Reg; + return 0; +} + +/// regsAreCompatible - Return true if the two registers are equal or aliased. +/// +static bool +regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { + if (RegA == RegB) + return true; + if (!RegA || !RegB) + return false; + return TRI->regsOverlap(RegA, RegB); +} + + +/// isProfitableToReMat - Return true if it's potentially profitable to commute +/// the two-address instruction that's being processed. +bool +TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, + MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Dist) { + // Determine if it's profitable to commute this two address instruction. In + // general, we want no uses between this instruction and the definition of + // the two-address register. + // e.g. + // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1 + // %reg1029<def> = MOV8rr %reg1028 + // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead> + // insert => %reg1030<def> = MOV8rr %reg1028 + // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead> + // In this case, it might not be possible to coalesce the second MOV8rr + // instruction if the first one is coalesced. So it would be profitable to + // commute it: + // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1 + // %reg1029<def> = MOV8rr %reg1028 + // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead> + // insert => %reg1030<def> = MOV8rr %reg1029 + // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead> + + if (!MI->killsRegister(regC)) + return false; + + // Ok, we have something like: + // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead> + // let's see if it's worth commuting it. + + // Look for situations like this: + // %reg1024<def> = MOV r1 + // %reg1025<def> = MOV r0 + // %reg1026<def> = ADD %reg1024, %reg1025 + // r0 = MOV %reg1026 + // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. + unsigned FromRegB = getMappedReg(regB, SrcRegMap); + unsigned FromRegC = getMappedReg(regC, SrcRegMap); + unsigned ToRegB = getMappedReg(regB, DstRegMap); + unsigned ToRegC = getMappedReg(regC, DstRegMap); + if (!regsAreCompatible(FromRegB, ToRegB, TRI) && + (regsAreCompatible(FromRegB, ToRegC, TRI) || + regsAreCompatible(FromRegC, ToRegB, TRI))) + return true; + + // If there is a use of regC between its last def (could be livein) and this + // instruction, then bail. + unsigned LastDefC = 0; + if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC)) + return false; + + // If there is a use of regB between its last def (could be livein) and this + // instruction, then go ahead and make this transformation. + unsigned LastDefB = 0; + if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB)) + return true; + + // Since there are no intervening uses for both registers, then commute + // if the def of regC is closer. Its live interval is shorter. + return LastDefB && LastDefC && LastDefC > LastDefB; +} + +/// CommuteInstruction - Commute a two-address instruction and update the basic +/// block, distance map, and live variables if needed. Return true if it is +/// successful. +bool +TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned RegC, unsigned Dist) { + MachineInstr *MI = mi; + DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); + MachineInstr *NewMI = TII->commuteInstruction(MI); + + if (NewMI == 0) { + DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); + return false; + } + + DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI); + // If the instruction changed to commute it, update livevar. + if (NewMI != MI) { + if (LV) + // Update live variables + LV->replaceKillInstruction(RegC, MI, NewMI); + + mbbi->insert(mi, NewMI); // Insert the new inst + mbbi->erase(mi); // Nuke the old inst. + mi = NewMI; + DistanceMap.insert(std::make_pair(NewMI, Dist)); + } + + // Update source register map. + unsigned FromRegC = getMappedReg(RegC, SrcRegMap); + if (FromRegC) { + unsigned RegA = MI->getOperand(0).getReg(); + SrcRegMap[RegA] = FromRegC; + } + + return true; +} + +/// isProfitableToConv3Addr - Return true if it is profitable to convert the +/// given 2-address instruction to a 3-address one. +bool +TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) { + // Look for situations like this: + // %reg1024<def> = MOV r1 + // %reg1025<def> = MOV r0 + // %reg1026<def> = ADD %reg1024, %reg1025 + // r2 = MOV %reg1026 + // Turn ADD into a 3-address instruction to avoid a copy. + unsigned FromRegA = getMappedReg(RegA, SrcRegMap); + unsigned ToRegA = getMappedReg(RegA, DstRegMap); + return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI)); +} + +/// ConvertInstTo3Addr - Convert the specified two-address instruction into a +/// three address one. Return true if this transformation was successful. +bool +TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned Dist) { + MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); + if (NewMI) { + DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); + DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); + bool Sunk = false; + + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) + // FIXME: Temporary workaround. If the new instruction doesn't + // uses RegB, convertToThreeAddress must have created more + // then one instruction. + Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi); + + mbbi->erase(mi); // Nuke the old inst. + + if (!Sunk) { + DistanceMap.insert(std::make_pair(NewMI, Dist)); + mi = NewMI; + nmi = llvm::next(mi); + } + return true; + } + + return false; +} + +/// ProcessCopy - If the specified instruction is not yet processed, process it +/// if it's a copy. For a copy instruction, we find the physical registers the +/// source and destination registers might be mapped to. These are kept in +/// point-to maps used to determine future optimizations. e.g. +/// v1024 = mov r0 +/// v1025 = mov r1 +/// v1026 = add v1024, v1025 +/// r1 = mov r1026 +/// If 'add' is a two-address instruction, v1024, v1026 are both potentially +/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is +/// potentially joined with r1 on the output side. It's worthwhile to commute +/// 'add' to eliminate a copy. +void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, + MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &Processed) { + if (Processed.count(MI)) + return; + + bool IsSrcPhys, IsDstPhys; + unsigned SrcReg, DstReg; + if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) + return; + + if (IsDstPhys && !IsSrcPhys) + DstRegMap.insert(std::make_pair(SrcReg, DstReg)); + else if (!IsDstPhys && IsSrcPhys) { + bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second; + if (!isNew) + assert(SrcRegMap[DstReg] == SrcReg && + "Can't map to two src physical registers!"); + + SmallVector<unsigned, 4> VirtRegPairs; + bool IsCopy = false; + unsigned NewReg = 0; + while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII, + IsCopy, NewReg, IsDstPhys)) { + if (IsCopy) { + if (!Processed.insert(UseMI)) + break; + } + + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end()) + // Earlier in the same MBB.Reached via a back edge. + break; + + if (IsDstPhys) { + VirtRegPairs.push_back(NewReg); + break; + } + bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second; + if (!isNew) + assert(SrcRegMap[NewReg] == DstReg && + "Can't map to two src physical registers!"); + VirtRegPairs.push_back(NewReg); + DstReg = NewReg; + } + + if (!VirtRegPairs.empty()) { + unsigned ToReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + while (!VirtRegPairs.empty()) { + unsigned FromReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[FromReg] == ToReg && + "Can't map to two dst physical registers!"); + ToReg = FromReg; + } + } + } + + Processed.insert(MI); +} + +/// isSafeToDelete - If the specified instruction does not produce any side +/// effects and all of its defs are dead, then it's safe to delete. +static bool isSafeToDelete(MachineInstr *MI, + const TargetInstrInfo *TII, + SmallVector<unsigned, 4> &Kills) { + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.mayStore() || TID.isCall()) + return false; + if (TID.isTerminator() || TID.hasUnmodeledSideEffects()) + return false; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDef() && !MO.isDead()) + return false; + if (MO.isUse() && MO.isKill()) + Kills.push_back(MO.getReg()); + } + return true; +} + +/// canUpdateDeletedKills - Check if all the registers listed in Kills are +/// killed by instructions in MBB preceding the current instruction at +/// position Dist. If so, return true and record information about the +/// preceding kills in NewKills. +bool TwoAddressInstructionPass:: +canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, + SmallVector<NewKill, 4> &NewKills, + MachineBasicBlock *MBB, unsigned Dist) { + while (!Kills.empty()) { + unsigned Kill = Kills.back(); + Kills.pop_back(); + if (TargetRegisterInfo::isPhysicalRegister(Kill)) + return false; + + MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist); + if (!LastKill) + return false; + + bool isModRef = LastKill->definesRegister(Kill); + NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef), + LastKill)); + } + return true; +} + +/// DeleteUnusedInstr - If an instruction with a tied register operand can +/// be safely deleted, just delete it. +bool +TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned Dist) { + // Check if the instruction has no side effects and if all its defs are dead. + SmallVector<unsigned, 4> Kills; + if (!isSafeToDelete(mi, TII, Kills)) + return false; + + // If this instruction kills some virtual registers, we need to + // update the kill information. If it's not possible to do so, + // then bail out. + SmallVector<NewKill, 4> NewKills; + if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist)) + return false; + + if (LV) { + while (!NewKills.empty()) { + MachineInstr *NewKill = NewKills.back().second; + unsigned Kill = NewKills.back().first.first; + bool isDead = NewKills.back().first.second; + NewKills.pop_back(); + if (LV->removeVirtualRegisterKilled(Kill, mi)) { + if (isDead) + LV->addVirtualRegisterDead(Kill, NewKill); + else + LV->addVirtualRegisterKilled(Kill, NewKill); + } + } + } + + mbbi->erase(mi); // Nuke the old inst. + mi = nmi; + return true; +} + +/// TryInstructionTransform - For the case where an instruction has a single +/// pair of tied register operands, attempt some transformations that may +/// either eliminate the tied operands or improve the opportunities for +/// coalescing away the register copy. Returns true if the tied operands +/// are eliminated altogether. +bool TwoAddressInstructionPass:: +TryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { + const TargetInstrDesc &TID = mi->getDesc(); + unsigned regA = mi->getOperand(DstIdx).getReg(); + unsigned regB = mi->getOperand(SrcIdx).getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(regB) && + "cannot make instruction into two-address form"); + + // If regA is dead and the instruction can be deleted, just delete + // it so it doesn't clobber regB. + bool regBKilled = isKilled(*mi, regB, MRI, TII); + if (!regBKilled && mi->getOperand(DstIdx).isDead() && + DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { + ++NumDeletes; + return true; // Done with this instruction. + } + + // Check if it is profitable to commute the operands. + unsigned SrcOp1, SrcOp2; + unsigned regC = 0; + unsigned regCIdx = ~0U; + bool TryCommute = false; + bool AggressiveCommute = false; + if (TID.isCommutable() && mi->getNumOperands() >= 3 && + TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { + if (SrcIdx == SrcOp1) + regCIdx = SrcOp2; + else if (SrcIdx == SrcOp2) + regCIdx = SrcOp1; + + if (regCIdx != ~0U) { + regC = mi->getOperand(regCIdx).getReg(); + if (!regBKilled && isKilled(*mi, regC, MRI, TII)) + // If C dies but B does not, swap the B and C operands. + // This makes the live ranges of A and C joinable. + TryCommute = true; + else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) { + TryCommute = true; + AggressiveCommute = true; + } + } + } + + // If it's profitable to commute, try to do so. + if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) { + ++NumCommuted; + if (AggressiveCommute) + ++NumAggrCommuted; + return false; + } + + if (TID.isConvertibleTo3Addr()) { + // This instruction is potentially convertible to a true + // three-address instruction. Check if it is profitable. + if (!regBKilled || isProfitableToConv3Addr(regA)) { + // Try to convert it. + if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { + ++NumConvertedTo3Addr; + return true; // Done with this instruction. + } + } + } + + // If this is an instruction with a load folded into it, try unfolding + // the load, e.g. avoid this: + // movq %rdx, %rcx + // addq (%rax), %rcx + // in favor of this: + // movq (%rax), %rcx + // addq %rdx, %rcx + // because it's preferable to schedule a load than a register copy. + if (TID.mayLoad() && !regBKilled) { + // Determine if a load can be unfolded. + unsigned LoadRegIndex; + unsigned NewOpc = + TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(), + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, + &LoadRegIndex); + if (NewOpc != 0) { + const TargetInstrDesc &UnfoldTID = TII->get(NewOpc); + if (UnfoldTID.getNumDefs() == 1) { + MachineFunction &MF = *mbbi->getParent(); + + // Unfold the load. + DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); + const TargetRegisterClass *RC = + UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI); + unsigned Reg = MRI->createVirtualRegister(RC); + SmallVector<MachineInstr *, 2> NewMIs; + if (!TII->unfoldMemoryOperand(MF, mi, Reg, + /*UnfoldLoad=*/true,/*UnfoldStore=*/false, + NewMIs)) { + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + return false; + } + assert(NewMIs.size() == 2 && + "Unfolded a load into multiple instructions!"); + // The load was previously folded, so this is the only use. + NewMIs[1]->addRegisterKilled(Reg, TRI); + + // Tentatively insert the instructions into the block so that they + // look "normal" to the transformation logic. + mbbi->insert(mi, NewMIs[0]); + mbbi->insert(mi, NewMIs[1]); + + DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] + << "2addr: NEW INST: " << *NewMIs[1]); + + // Transform the instruction, now that it no longer has a load. + unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); + unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); + MachineBasicBlock::iterator NewMI = NewMIs[1]; + bool TransformSuccess = + TryInstructionTransform(NewMI, mi, mbbi, + NewSrcIdx, NewDstIdx, Dist); + if (TransformSuccess || + NewMIs[1]->getOperand(NewSrcIdx).isKill()) { + // Success, or at least we made an improvement. Keep the unfolded + // instructions and discard the original. + if (LV) { + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() != 0 && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isUse()) { + if (MO.isKill()) { + if (NewMIs[0]->killsRegister(MO.getReg())) + LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]); + else { + assert(NewMIs[1]->killsRegister(MO.getReg()) && + "Kill missing after load unfold!"); + LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]); + } + } + } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) { + if (NewMIs[1]->registerDefIsDead(MO.getReg())) + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); + else { + assert(NewMIs[0]->registerDefIsDead(MO.getReg()) && + "Dead flag missing after load unfold!"); + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]); + } + } + } + } + LV->addVirtualRegisterKilled(Reg, NewMIs[1]); + } + mi->eraseFromParent(); + mi = NewMIs[1]; + if (TransformSuccess) + return true; + } else { + // Transforming didn't eliminate the tie and didn't lead to an + // improvement. Clean up the unfolded instructions and keep the + // original. + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + NewMIs[0]->eraseFromParent(); + NewMIs[1]->eraseFromParent(); + } + } + } + } + + return false; +} + +/// runOnMachineFunction - Reduce two-address instructions to two operands. +/// +bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "Machine Function\n"); + const TargetMachine &TM = MF.getTarget(); + MRI = &MF.getRegInfo(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + LV = getAnalysisIfAvailable<LiveVariables>(); + AA = &getAnalysis<AliasAnalysis>(); + + bool MadeChange = false; + + DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); + DEBUG(dbgs() << "********** Function: " + << MF.getFunction()->getName() << '\n'); + + // ReMatRegs - Keep track of the registers whose def's are remat'ed. + BitVector ReMatRegs; + ReMatRegs.resize(MRI->getLastVirtReg()+1); + + typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> > + TiedOperandMap; + TiedOperandMap TiedOperands(4); + + SmallPtrSet<MachineInstr*, 8> Processed; + for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + mbbi != mbbe; ++mbbi) { + unsigned Dist = 0; + DistanceMap.clear(); + SrcRegMap.clear(); + DstRegMap.clear(); + Processed.clear(); + for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + mi != me; ) { + MachineBasicBlock::iterator nmi = llvm::next(mi); + if (mi->isDebugValue()) { + mi = nmi; + continue; + } + + // Remember REG_SEQUENCE instructions, we'll deal with them later. + if (mi->isRegSequence()) + RegSequences.push_back(&*mi); + + const TargetInstrDesc &TID = mi->getDesc(); + bool FirstTied = true; + + DistanceMap.insert(std::make_pair(mi, ++Dist)); + + ProcessCopy(&*mi, &*mbbi, Processed); + + // First scan through all the tied register uses in this instruction + // and record a list of pairs of tied operands for each register. + unsigned NumOps = mi->isInlineAsm() + ? mi->getNumOperands() : TID.getNumOperands(); + for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { + unsigned DstIdx = 0; + if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) + continue; + + if (FirstTied) { + FirstTied = false; + ++NumTwoAddressInstrs; + DEBUG(dbgs() << '\t' << *mi); + } + + assert(mi->getOperand(SrcIdx).isReg() && + mi->getOperand(SrcIdx).getReg() && + mi->getOperand(SrcIdx).isUse() && + "two address instruction invalid"); + + unsigned regB = mi->getOperand(SrcIdx).getReg(); + TiedOperandMap::iterator OI = TiedOperands.find(regB); + if (OI == TiedOperands.end()) { + SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair; + OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first; + } + OI->second.push_back(std::make_pair(SrcIdx, DstIdx)); + } + + // Now iterate over the information collected above. + for (TiedOperandMap::iterator OI = TiedOperands.begin(), + OE = TiedOperands.end(); OI != OE; ++OI) { + SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second; + + // If the instruction has a single pair of tied operands, try some + // transformations that may either eliminate the tied operands or + // improve the opportunities for coalescing away the register copy. + if (TiedOperands.size() == 1 && TiedPairs.size() == 1) { + unsigned SrcIdx = TiedPairs[0].first; + unsigned DstIdx = TiedPairs[0].second; + + // If the registers are already equal, nothing needs to be done. + if (mi->getOperand(SrcIdx).getReg() == + mi->getOperand(DstIdx).getReg()) + break; // Done with this instruction. + + if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist)) + break; // The tied operands have been eliminated. + } + + bool RemovedKillFlag = false; + bool AllUsesCopied = true; + unsigned LastCopiedReg = 0; + unsigned regB = OI->first; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + unsigned SrcIdx = TiedPairs[tpi].first; + unsigned DstIdx = TiedPairs[tpi].second; + unsigned regA = mi->getOperand(DstIdx).getReg(); + // Grab regB from the instruction because it may have changed if the + // instruction was commuted. + regB = mi->getOperand(SrcIdx).getReg(); + + if (regA == regB) { + // The register is tied to multiple destinations (or else we would + // not have continued this far), but this use of the register + // already matches the tied destination. Leave it. + AllUsesCopied = false; + continue; + } + LastCopiedReg = regA; + + assert(TargetRegisterInfo::isVirtualRegister(regB) && + "cannot make instruction into two-address form"); + +#ifndef NDEBUG + // First, verify that we don't have a use of "a" in the instruction + // (a = b + a for example) because our transformation will not + // work. This should never occur because we are in SSA form. + for (unsigned i = 0; i != mi->getNumOperands(); ++i) + assert(i == DstIdx || + !mi->getOperand(i).isReg() || + mi->getOperand(i).getReg() != regA); +#endif + + // Emit a copy or rematerialize the definition. + const TargetRegisterClass *rc = MRI->getRegClass(regB); + MachineInstr *DefMI = MRI->getVRegDef(regB); + // If it's safe and profitable, remat the definition instead of + // copying it. + if (DefMI && + DefMI->getDesc().isAsCheapAsAMove() && + DefMI->isSafeToReMat(TII, AA, regB) && + isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ + DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); + unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); + TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI); + ReMatRegs.set(regB); + ++NumReMats; + } else { + BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY), + regA).addReg(regB); + } + + MachineBasicBlock::iterator prevMI = prior(mi); + // Update DistanceMap. + DistanceMap.insert(std::make_pair(prevMI, Dist)); + DistanceMap[mi] = ++Dist; + + DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI); + + MachineOperand &MO = mi->getOperand(SrcIdx); + assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && + "inconsistent operand info for 2-reg pass"); + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(regA); + } + + if (AllUsesCopied) { + // Replace other (un-tied) uses of regB with LastCopiedReg. + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); + } + } + + // Update live variables for regB. + if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) + LV->addVirtualRegisterKilled(regB, prior(mi)); + + } else if (RemovedKillFlag) { + // Some tied uses of regB matched their destination registers, so + // regB is still used in this instruction, but a kill flag was + // removed from a different tied use of regB, so now we need to add + // a kill flag to one of the remaining uses of regB. + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { + MO.setIsKill(true); + break; + } + } + } + + // Schedule the source copy / remat inserted to form two-address + // instruction. FIXME: Does it matter the distance map may not be + // accurate after it's scheduled? + TII->scheduleTwoAddrSource(prior(mi), mi, *TRI); + + MadeChange = true; + + DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); + } + + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } + + // Clear TiedOperands here instead of at the top of the loop + // since most instructions do not have tied operands. + TiedOperands.clear(); + mi = nmi; + } + } + + // Some remat'ed instructions are dead. + int VReg = ReMatRegs.find_first(); + while (VReg != -1) { + if (MRI->use_nodbg_empty(VReg)) { + MachineInstr *DefMI = MRI->getVRegDef(VReg); + DefMI->eraseFromParent(); + } + VReg = ReMatRegs.find_next(VReg); + } + + // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve + // SSA form. It's now safe to de-SSA. + MadeChange |= EliminateRegSequences(); + + return MadeChange; +} + +static void UpdateRegSequenceSrcs(unsigned SrcReg, + unsigned DstReg, unsigned SubIdx, + MachineRegisterInfo *MRI, + const TargetRegisterInfo &TRI) { + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), + RE = MRI->reg_end(); RI != RE; ) { + MachineOperand &MO = RI.getOperand(); + ++RI; + MO.substVirtReg(DstReg, SubIdx, TRI); + } +} + +/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are +/// EXTRACT_SUBREG from the same register and to the same virtual register +/// with different sub-register indices, attempt to combine the +/// EXTRACT_SUBREGs and pre-coalesce them. e.g. +/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0 +/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6 +/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5 +/// Since D subregs 5, 6 can combine to a Q register, we can coalesce +/// reg1026 to reg1029. +void +TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, + unsigned DstReg) { + SmallSet<unsigned, 4> Seen; + for (unsigned i = 0, e = Srcs.size(); i != e; ++i) { + unsigned SrcReg = Srcs[i]; + if (!Seen.insert(SrcReg)) + continue; + + // Check that the instructions are all in the same basic block. + MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg); + MachineInstr *DstDefMI = MRI->getVRegDef(DstReg); + if (SrcDefMI->getParent() != DstDefMI->getParent()) + continue; + + // If there are no other uses than copies which feed into + // the reg_sequence, then we might be able to coalesce them. + bool CanCoalesce = true; + SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) { + CanCoalesce = false; + break; + } + SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg()); + DstSubIndices.push_back(UseMI->getOperand(0).getSubReg()); + } + + if (!CanCoalesce || SrcSubIndices.size() < 2) + continue; + + // Check that the source subregisters can be combined. + std::sort(SrcSubIndices.begin(), SrcSubIndices.end()); + unsigned NewSrcSubIdx = 0; + if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices, + NewSrcSubIdx)) + continue; + + // Check that the destination subregisters can also be combined. + std::sort(DstSubIndices.begin(), DstSubIndices.end()); + unsigned NewDstSubIdx = 0; + if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices, + NewDstSubIdx)) + continue; + + // If neither source nor destination can be combined to the full register, + // just give up. This could be improved if it ever matters. + if (NewSrcSubIdx != 0 && NewDstSubIdx != 0) + continue; + + // Now that we know that all the uses are extract_subregs and that those + // subregs can somehow be combined, scan all the extract_subregs again to + // make sure the subregs are in the right order and can be composed. + MachineInstr *SomeMI = 0; + CanCoalesce = true; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + assert(UseMI->isCopy()); + unsigned DstSubIdx = UseMI->getOperand(0).getSubReg(); + unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg(); + assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination"); + if ((NewDstSubIdx == 0 && + TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) || + (NewSrcSubIdx == 0 && + TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) { + CanCoalesce = false; + break; + } + // Keep track of one of the uses. + SomeMI = UseMI; + } + if (!CanCoalesce) + continue; + + // Insert a copy to replace the original. + MachineBasicBlock::iterator InsertLoc = SomeMI; + MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, + SomeMI->getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, NewDstSubIdx) + .addReg(SrcReg, 0, NewSrcSubIdx); + + // Remove all the old extract instructions. + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ) { + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI == CopyMI) + continue; + assert(UseMI->isCopy()); + // Move any kills to the new copy or extract instruction. + if (UseMI->getOperand(1).isKill()) { + CopyMI->getOperand(1).setIsKill(); + if (LV) + // Update live variables + LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI); + } + UseMI->eraseFromParent(); + } + } +} + +static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, + MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI != RegSeq && UseMI->isRegSequence()) + return true; + } + return false; +} + +/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part +/// of the de-ssa process. This replaces sources of REG_SEQUENCE as +/// sub-register references of the register defined by REG_SEQUENCE. e.g. +/// +/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ... +/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6 +/// => +/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ... +bool TwoAddressInstructionPass::EliminateRegSequences() { + if (RegSequences.empty()) + return false; + + for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) { + MachineInstr *MI = RegSequences[i]; + unsigned DstReg = MI->getOperand(0).getReg(); + if (MI->getOperand(0).getSubReg() || + TargetRegisterInfo::isPhysicalRegister(DstReg) || + !(MI->getNumOperands() & 1)) { + DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); + llvm_unreachable(0); + } + + bool IsImpDef = true; + SmallVector<unsigned, 4> RealSrcs; + SmallSet<unsigned, 4> Seen; + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + if (MI->getOperand(i).getSubReg() || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); + llvm_unreachable(0); + } + + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI->isImplicitDef()) { + DefMI->eraseFromParent(); + continue; + } + IsImpDef = false; + + // Remember COPY sources. These might be candidate for coalescing. + if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg()) + RealSrcs.push_back(DefMI->getOperand(1).getReg()); + + bool isKill = MI->getOperand(i).isKill(); + if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() || + !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) { + // REG_SEQUENCE cannot have duplicated operands, add a copy. + // Also add an copy if the source is live-in the block. We don't want + // to end up with a partial-redef of a livein, e.g. + // BB0: + // reg1051:10<def> = + // ... + // BB1: + // ... = reg1051:10 + // BB2: + // reg1051:9<def> = + // LiveIntervalAnalysis won't like it. + // + // If the REG_SEQUENCE doesn't kill its source, keeping live variables + // correctly up to date becomes very difficult. Insert a copy. + + // Defer any kill flag to the last operand using SrcReg. Otherwise, we + // might insert a COPY that uses SrcReg after is was killed. + if (isKill) + for (unsigned j = i + 2; j < e; j += 2) + if (MI->getOperand(j).getReg() == SrcReg) { + MI->getOperand(j).setIsKill(); + isKill = false; + break; + } + + MachineBasicBlock::iterator InsertLoc = MI; + MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, + MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm()) + .addReg(SrcReg, getKillRegState(isKill)); + MI->getOperand(i).setReg(0); + if (LV && isKill) + LV->replaceKillInstruction(SrcReg, MI, CopyMI); + DEBUG(dbgs() << "Inserted: " << *CopyMI); + } + } + + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + if (!SrcReg) continue; + unsigned SubIdx = MI->getOperand(i+1).getImm(); + UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI); + } + + if (IsImpDef) { + DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); + MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) + MI->RemoveOperand(j); + } else { + DEBUG(dbgs() << "Eliminated: " << *MI); + MI->eraseFromParent(); + } + + // Try coalescing some EXTRACT_SUBREG instructions. This can create + // INSERT_SUBREG instructions that must have <undef> flags added by + // LiveIntervalAnalysis, so only run it when LiveVariables is available. + if (LV) + CoalesceExtSubRegs(RealSrcs, DstReg); + } + + RegSequences.clear(); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp new file mode 100644 index 0000000..6dd3333 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -0,0 +1,205 @@ +//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is an extremely simple version of the SimplifyCFG pass. Its sole +// job is to delete LLVM basic blocks that are not reachable from the entry +// node. To do this, it performs a simple depth first traversal of the CFG, +// then deletes any unvisited nodes. +// +// Note that this pass is really a hack. In particular, the instruction +// selectors for various targets should just not generate code for unreachable +// blocks. Until LLVM has a more systematic way of defining instruction +// selectors, however, we cannot really expect them to handle additional +// complexity. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Constant.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CFG.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +using namespace llvm; + +namespace { + class UnreachableBlockElim : public FunctionPass { + virtual bool runOnFunction(Function &F); + public: + static char ID; // Pass identification, replacement for typeid + UnreachableBlockElim() : FunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<ProfileInfo>(); + } + }; +} +char UnreachableBlockElim::ID = 0; +INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim", + "Remove unreachable blocks from the CFG", false, false); + +FunctionPass *llvm::createUnreachableBlockEliminationPass() { + return new UnreachableBlockElim(); +} + +bool UnreachableBlockElim::runOnFunction(Function &F) { + SmallPtrSet<BasicBlock*, 8> Reachable; + + // Mark all reachable blocks. + for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I = + df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I) + /* Mark all reachable blocks */; + + // Loop over all dead blocks, remembering them and deleting all instructions + // in them. + std::vector<BasicBlock*> DeadBlocks; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + if (!Reachable.count(I)) { + BasicBlock *BB = I; + DeadBlocks.push_back(BB); + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + PN->replaceAllUsesWith(Constant::getNullValue(PN->getType())); + BB->getInstList().pop_front(); + } + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) + (*SI)->removePredecessor(BB); + BB->dropAllReferences(); + } + + // Actually remove the blocks now. + ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>(); + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { + if (PI) PI->removeBlock(DeadBlocks[i]); + DeadBlocks[i]->eraseFromParent(); + } + + return DeadBlocks.size(); +} + + +namespace { + class UnreachableMachineBlockElim : public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + MachineModuleInfo *MMI; + public: + static char ID; // Pass identification, replacement for typeid + UnreachableMachineBlockElim() : MachineFunctionPass(ID) {} + }; +} +char UnreachableMachineBlockElim::ID = 0; + +INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination", + "Remove unreachable machine basic blocks", false, false); + +char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID; + +void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { + SmallPtrSet<MachineBasicBlock*, 8> Reachable; + + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + + // Mark all reachable blocks. + for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> > + I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); + I != E; ++I) + /* Mark all reachable blocks */; + + // Loop over all dead blocks, remembering them and deleting all instructions + // in them. + std::vector<MachineBasicBlock*> DeadBlocks; + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + + // Test for deadness. + if (!Reachable.count(BB)) { + DeadBlocks.push_back(BB); + + // Update dominator and loop info. + if (MLI) MLI->removeBlock(BB); + if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB); + + while (BB->succ_begin() != BB->succ_end()) { + MachineBasicBlock* succ = *BB->succ_begin(); + + MachineBasicBlock::iterator start = succ->begin(); + while (start != succ->end() && start->isPHI()) { + for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2) + if (start->getOperand(i).isMBB() && + start->getOperand(i).getMBB() == BB) { + start->RemoveOperand(i); + start->RemoveOperand(i-1); + } + + start++; + } + + BB->removeSuccessor(BB->succ_begin()); + } + } + } + + // Actually remove the blocks now. + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) + DeadBlocks[i]->eraseFromParent(); + + // Cleanup PHI nodes. + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + // Prune unneeded PHI entries. + SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(), + BB->pred_end()); + MachineBasicBlock::iterator phi = BB->begin(); + while (phi != BB->end() && phi->isPHI()) { + for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2) + if (!preds.count(phi->getOperand(i).getMBB())) { + phi->RemoveOperand(i); + phi->RemoveOperand(i-1); + } + + if (phi->getNumOperands() == 3) { + unsigned Input = phi->getOperand(1).getReg(); + unsigned Output = phi->getOperand(0).getReg(); + + MachineInstr* temp = phi; + ++phi; + temp->eraseFromParent(); + + if (Input != Output) + F.getRegInfo().replaceRegWith(Output, Input); + + continue; + } + + ++phi; + } + } + + F.RenumberBlocks(); + + return DeadBlocks.size(); +} diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp new file mode 100644 index 0000000..20ffcff --- /dev/null +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -0,0 +1,283 @@ +//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the VirtRegMap class. +// +// It also contains implementations of the Spiller interface, which, given a +// virtual register map and a machine function, eliminates all virtual +// references by replacing them with physical register references - adding spill +// code as necessary. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "virtregmap" +#include "VirtRegMap.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumSpills , "Number of register spills"); + +//===----------------------------------------------------------------------===// +// VirtRegMap implementation +//===----------------------------------------------------------------------===// + +char VirtRegMap::ID = 0; + +INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false); + +bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { + MRI = &mf.getRegInfo(); + TII = mf.getTarget().getInstrInfo(); + TRI = mf.getTarget().getRegisterInfo(); + MF = &mf; + + ReMatId = MAX_STACK_SLOT+1; + LowSpillSlot = HighSpillSlot = NO_STACK_SLOT; + + Virt2PhysMap.clear(); + Virt2StackSlotMap.clear(); + Virt2ReMatIdMap.clear(); + Virt2SplitMap.clear(); + Virt2SplitKillMap.clear(); + ReMatMap.clear(); + ImplicitDefed.clear(); + SpillSlotToUsesMap.clear(); + MI2VirtMap.clear(); + SpillPt2VirtMap.clear(); + RestorePt2VirtMap.clear(); + EmergencySpillMap.clear(); + EmergencySpillSlots.clear(); + + SpillSlotToUsesMap.resize(8); + ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1- + TargetRegisterInfo::FirstVirtualRegister); + + allocatableRCRegs.clear(); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + allocatableRCRegs.insert(std::make_pair(*I, + TRI->getAllocatableSet(mf, *I))); + + grow(); + + return false; +} + +void VirtRegMap::grow() { + unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); + Virt2PhysMap.grow(LastVirtReg); + Virt2StackSlotMap.grow(LastVirtReg); + Virt2ReMatIdMap.grow(LastVirtReg); + Virt2SplitMap.grow(LastVirtReg); + Virt2SplitKillMap.grow(LastVirtReg); + ReMatMap.grow(LastVirtReg); + ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1); +} + +unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) { + std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg); + unsigned physReg = Hint.second; + if (physReg && + TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg)) + physReg = getPhys(physReg); + if (Hint.first == 0) + return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg)) + ? physReg : 0; + return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF); +} + +int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); + int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); + if (LowSpillSlot == NO_STACK_SLOT) + LowSpillSlot = SS; + if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) + HighSpillSlot = SS; + unsigned Idx = SS-LowSpillSlot; + while (Idx >= SpillSlotToUsesMap.size()) + SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2); + Virt2StackSlotMap[virtReg] = SS; + ++NumSpills; + return SS; +} + +void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + assert((SS >= 0 || + (SS >= MF->getFrameInfo()->getObjectIndexBegin())) && + "illegal fixed frame index"); + Virt2StackSlotMap[virtReg] = SS; +} + +int VirtRegMap::assignVirtReMatId(unsigned virtReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && + "attempt to assign re-mat id to already spilled register"); + Virt2ReMatIdMap[virtReg] = ReMatId; + return ReMatId++; +} + +void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && + "attempt to assign re-mat id to already spilled register"); + Virt2ReMatIdMap[virtReg] = id; +} + +int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) { + std::map<const TargetRegisterClass*, int>::iterator I = + EmergencySpillSlots.find(RC); + if (I != EmergencySpillSlots.end()) + return I->second; + int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); + if (LowSpillSlot == NO_STACK_SLOT) + LowSpillSlot = SS; + if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) + HighSpillSlot = SS; + EmergencySpillSlots[RC] = SS; + return SS; +} + +void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) { + if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) { + // If FI < LowSpillSlot, this stack reference was produced by + // instruction selection and is not a spill + if (FI >= LowSpillSlot) { + assert(FI >= 0 && "Spill slot index should not be negative!"); + assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() + && "Invalid spill slot"); + SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI); + } + } +} + +void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI, + MachineInstr *NewMI, ModRef MRInfo) { + // Move previous memory references folded to new instruction. + MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI); + for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI), + E = MI2VirtMap.end(); I != E && I->first == OldMI; ) { + MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second)); + MI2VirtMap.erase(I++); + } + + // add new memory reference + MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo))); +} + +void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) { + MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI); + MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo))); +} + +void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (MF->getFrameInfo()->isFixedObjectIndex(FI)) + continue; + // This stack reference was produced by instruction selection and + // is not a spill + if (FI < LowSpillSlot) + continue; + assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() + && "Invalid spill slot"); + SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI); + } + MI2VirtMap.erase(MI); + SpillPt2VirtMap.erase(MI); + RestorePt2VirtMap.erase(MI); + EmergencySpillMap.erase(MI); +} + +/// FindUnusedRegisters - Gather a list of allocatable registers that +/// have not been allocated to any virtual register. +bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) { + unsigned NumRegs = TRI->getNumRegs(); + UnusedRegs.reset(); + UnusedRegs.resize(NumRegs); + + BitVector Used(NumRegs); + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) + if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG) + Used.set(Virt2PhysMap[i]); + + BitVector Allocatable = TRI->getAllocatableSet(*MF); + bool AnyUnused = false; + for (unsigned Reg = 1; Reg < NumRegs; ++Reg) { + if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) { + bool ReallyUnused = true; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + if (Used[*AS] || LIs->hasInterval(*AS)) { + ReallyUnused = false; + break; + } + } + if (ReallyUnused) { + AnyUnused = true; + UnusedRegs.set(Reg); + } + } + } + + return AnyUnused; +} + +void VirtRegMap::print(raw_ostream &OS, const Module* M) const { + const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + + OS << "********** REGISTER MAP **********\n"; + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) { + if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG) + OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i]) + << "] " << MRI.getRegClass(i)->getName() << "\n"; + } + + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) + if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT) + OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] + << "] " << MRI.getRegClass(i)->getName() << "\n"; + OS << '\n'; +} + +void VirtRegMap::dump() const { + print(dbgs()); +} diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.h b/contrib/llvm/lib/CodeGen/VirtRegMap.h new file mode 100644 index 0000000..8b6082d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.h @@ -0,0 +1,500 @@ +//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a virtual register map. This maps virtual registers to +// physical registers and virtual registers to stack slots. It is created and +// updated by a register allocator and then used by a machine code rewriter that +// adds spill code and rewrites virtual into physical register references. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_VIRTREGMAP_H +#define LLVM_CODEGEN_VIRTREGMAP_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include <map> + +namespace llvm { + class LiveIntervals; + class MachineInstr; + class MachineFunction; + class MachineRegisterInfo; + class TargetInstrInfo; + class TargetRegisterInfo; + class raw_ostream; + + class VirtRegMap : public MachineFunctionPass { + public: + enum { + NO_PHYS_REG = 0, + NO_STACK_SLOT = (1L << 30)-1, + MAX_STACK_SLOT = (1L << 18)-1 + }; + + enum ModRef { isRef = 1, isMod = 2, isModRef = 3 }; + typedef std::multimap<MachineInstr*, + std::pair<unsigned, ModRef> > MI2VirtMapTy; + + private: + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineFunction *MF; + + DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs; + + /// Virt2PhysMap - This is a virtual to physical register + /// mapping. Each virtual register is required to have an entry in + /// it; even spilled virtual registers (the register mapped to a + /// spilled register is the temporary used to load it from the + /// stack). + IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap; + + /// Virt2StackSlotMap - This is virtual register to stack slot + /// mapping. Each spilled virtual register has an entry in it + /// which corresponds to the stack slot this register is spilled + /// at. + IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap; + + /// Virt2ReMatIdMap - This is virtual register to rematerialization id + /// mapping. Each spilled virtual register that should be remat'd has an + /// entry in it which corresponds to the remat id. + IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap; + + /// Virt2SplitMap - This is virtual register to splitted virtual register + /// mapping. + IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap; + + /// Virt2SplitKillMap - This is splitted virtual register to its last use + /// (kill) index mapping. + IndexedMap<SlotIndex> Virt2SplitKillMap; + + /// ReMatMap - This is virtual register to re-materialized instruction + /// mapping. Each virtual register whose definition is going to be + /// re-materialized has an entry in it. + IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap; + + /// MI2VirtMap - This is MachineInstr to virtual register + /// mapping. In the case of memory spill code being folded into + /// instructions, we need to know which virtual register was + /// read/written by this instruction. + MI2VirtMapTy MI2VirtMap; + + /// SpillPt2VirtMap - This records the virtual registers which should + /// be spilled right after the MachineInstr due to live interval + /// splitting. + std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > > + SpillPt2VirtMap; + + /// RestorePt2VirtMap - This records the virtual registers which should + /// be restored right before the MachineInstr due to live interval + /// splitting. + std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap; + + /// EmergencySpillMap - This records the physical registers that should + /// be spilled / restored around the MachineInstr since the register + /// allocator has run out of registers. + std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap; + + /// EmergencySpillSlots - This records emergency spill slots used to + /// spill physical registers when the register allocator runs out of + /// registers. Ideally only one stack slot is used per function per + /// register class. + std::map<const TargetRegisterClass*, int> EmergencySpillSlots; + + /// ReMatId - Instead of assigning a stack slot to a to be rematerialized + /// virtual register, an unique id is being assigned. This keeps track of + /// the highest id used so far. Note, this starts at (1<<18) to avoid + /// conflicts with stack slot numbers. + int ReMatId; + + /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes. + int LowSpillSlot, HighSpillSlot; + + /// SpillSlotToUsesMap - Records uses for each register spill slot. + SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap; + + /// ImplicitDefed - One bit for each virtual register. If set it indicates + /// the register is implicitly defined. + BitVector ImplicitDefed; + + /// UnusedRegs - A list of physical registers that have not been used. + BitVector UnusedRegs; + + VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT + void operator=(const VirtRegMap&); // DO NOT IMPLEMENT + + public: + static char ID; + VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), + Virt2StackSlotMap(NO_STACK_SLOT), + Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0), + Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL), + ReMatId(MAX_STACK_SLOT+1), + LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { } + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineFunction &getMachineFunction() const { + assert(MF && "getMachineFunction called before runOnMAchineFunction"); + return *MF; + } + + void grow(); + + /// @brief returns true if the specified virtual register is + /// mapped to a physical register + bool hasPhys(unsigned virtReg) const { + return getPhys(virtReg) != NO_PHYS_REG; + } + + /// @brief returns the physical register mapped to the specified + /// virtual register + unsigned getPhys(unsigned virtReg) const { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + return Virt2PhysMap[virtReg]; + } + + /// @brief creates a mapping for the specified virtual register to + /// the specified physical register + void assignVirt2Phys(unsigned virtReg, unsigned physReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg) && + TargetRegisterInfo::isPhysicalRegister(physReg)); + assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && + "attempt to assign physical register to already mapped " + "virtual register"); + Virt2PhysMap[virtReg] = physReg; + } + + /// @brief clears the specified virtual register's, physical + /// register mapping + void clearVirt(unsigned virtReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2PhysMap[virtReg] != NO_PHYS_REG && + "attempt to clear a not assigned virtual register"); + Virt2PhysMap[virtReg] = NO_PHYS_REG; + } + + /// @brief clears all virtual to physical register mappings + void clearAllVirt() { + Virt2PhysMap.clear(); + grow(); + } + + /// @brief returns the register allocation preference. + unsigned getRegAllocPref(unsigned virtReg); + + /// @brief records virtReg is a split live interval from SReg. + void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { + Virt2SplitMap[virtReg] = SReg; + } + + /// @brief returns the live interval virtReg is split from. + unsigned getPreSplitReg(unsigned virtReg) { + return Virt2SplitMap[virtReg]; + } + + /// @brief returns true if the specified virtual register is not + /// mapped to a stack slot or rematerialized. + bool isAssignedReg(unsigned virtReg) const { + if (getStackSlot(virtReg) == NO_STACK_SLOT && + getReMatId(virtReg) == NO_STACK_SLOT) + return true; + // Split register can be assigned a physical register as well as a + // stack slot or remat id. + return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG); + } + + /// @brief returns the stack slot mapped to the specified virtual + /// register + int getStackSlot(unsigned virtReg) const { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + return Virt2StackSlotMap[virtReg]; + } + + /// @brief returns the rematerialization id mapped to the specified virtual + /// register + int getReMatId(unsigned virtReg) const { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + return Virt2ReMatIdMap[virtReg]; + } + + /// @brief create a mapping for the specifed virtual register to + /// the next available stack slot + int assignVirt2StackSlot(unsigned virtReg); + /// @brief create a mapping for the specified virtual register to + /// the specified stack slot + void assignVirt2StackSlot(unsigned virtReg, int frameIndex); + + /// @brief assign an unique re-materialization id to the specified + /// virtual register. + int assignVirtReMatId(unsigned virtReg); + /// @brief assign an unique re-materialization id to the specified + /// virtual register. + void assignVirtReMatId(unsigned virtReg, int id); + + /// @brief returns true if the specified virtual register is being + /// re-materialized. + bool isReMaterialized(unsigned virtReg) const { + return ReMatMap[virtReg] != NULL; + } + + /// @brief returns the original machine instruction being re-issued + /// to re-materialize the specified virtual register. + MachineInstr *getReMaterializedMI(unsigned virtReg) const { + return ReMatMap[virtReg]; + } + + /// @brief records the specified virtual register will be + /// re-materialized and the original instruction which will be re-issed + /// for this purpose. If parameter all is true, then all uses of the + /// registers are rematerialized and it's safe to delete the definition. + void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) { + ReMatMap[virtReg] = def; + } + + /// @brief record the last use (kill) of a split virtual register. + void addKillPoint(unsigned virtReg, SlotIndex index) { + Virt2SplitKillMap[virtReg] = index; + } + + SlotIndex getKillPoint(unsigned virtReg) const { + return Virt2SplitKillMap[virtReg]; + } + + /// @brief remove the last use (kill) of a split virtual register. + void removeKillPoint(unsigned virtReg) { + Virt2SplitKillMap[virtReg] = SlotIndex(); + } + + /// @brief returns true if the specified MachineInstr is a spill point. + bool isSpillPt(MachineInstr *Pt) const { + return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end(); + } + + /// @brief returns the virtual registers that should be spilled due to + /// splitting right after the specified MachineInstr. + std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) { + return SpillPt2VirtMap[Pt]; + } + + /// @brief records the specified MachineInstr as a spill point for virtReg. + void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) { + std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator + I = SpillPt2VirtMap.find(Pt); + if (I != SpillPt2VirtMap.end()) + I->second.push_back(std::make_pair(virtReg, isKill)); + else { + std::vector<std::pair<unsigned,bool> > Virts; + Virts.push_back(std::make_pair(virtReg, isKill)); + SpillPt2VirtMap.insert(std::make_pair(Pt, Virts)); + } + } + + /// @brief - transfer spill point information from one instruction to + /// another. + void transferSpillPts(MachineInstr *Old, MachineInstr *New) { + std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator + I = SpillPt2VirtMap.find(Old); + if (I == SpillPt2VirtMap.end()) + return; + while (!I->second.empty()) { + unsigned virtReg = I->second.back().first; + bool isKill = I->second.back().second; + I->second.pop_back(); + addSpillPoint(virtReg, isKill, New); + } + SpillPt2VirtMap.erase(I); + } + + /// @brief returns true if the specified MachineInstr is a restore point. + bool isRestorePt(MachineInstr *Pt) const { + return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end(); + } + + /// @brief returns the virtual registers that should be restoreed due to + /// splitting right after the specified MachineInstr. + std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) { + return RestorePt2VirtMap[Pt]; + } + + /// @brief records the specified MachineInstr as a restore point for virtReg. + void addRestorePoint(unsigned virtReg, MachineInstr *Pt) { + std::map<MachineInstr*, std::vector<unsigned> >::iterator I = + RestorePt2VirtMap.find(Pt); + if (I != RestorePt2VirtMap.end()) + I->second.push_back(virtReg); + else { + std::vector<unsigned> Virts; + Virts.push_back(virtReg); + RestorePt2VirtMap.insert(std::make_pair(Pt, Virts)); + } + } + + /// @brief - transfer restore point information from one instruction to + /// another. + void transferRestorePts(MachineInstr *Old, MachineInstr *New) { + std::map<MachineInstr*, std::vector<unsigned> >::iterator I = + RestorePt2VirtMap.find(Old); + if (I == RestorePt2VirtMap.end()) + return; + while (!I->second.empty()) { + unsigned virtReg = I->second.back(); + I->second.pop_back(); + addRestorePoint(virtReg, New); + } + RestorePt2VirtMap.erase(I); + } + + /// @brief records that the specified physical register must be spilled + /// around the specified machine instr. + void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) { + if (EmergencySpillMap.find(MI) != EmergencySpillMap.end()) + EmergencySpillMap[MI].push_back(PhysReg); + else { + std::vector<unsigned> PhysRegs; + PhysRegs.push_back(PhysReg); + EmergencySpillMap.insert(std::make_pair(MI, PhysRegs)); + } + } + + /// @brief returns true if one or more physical registers must be spilled + /// around the specified instruction. + bool hasEmergencySpills(MachineInstr *MI) const { + return EmergencySpillMap.find(MI) != EmergencySpillMap.end(); + } + + /// @brief returns the physical registers to be spilled and restored around + /// the instruction. + std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) { + return EmergencySpillMap[MI]; + } + + /// @brief - transfer emergency spill information from one instruction to + /// another. + void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) { + std::map<MachineInstr*,std::vector<unsigned> >::iterator I = + EmergencySpillMap.find(Old); + if (I == EmergencySpillMap.end()) + return; + while (!I->second.empty()) { + unsigned virtReg = I->second.back(); + I->second.pop_back(); + addEmergencySpill(virtReg, New); + } + EmergencySpillMap.erase(I); + } + + /// @brief return or get a emergency spill slot for the register class. + int getEmergencySpillSlot(const TargetRegisterClass *RC); + + /// @brief Return lowest spill slot index. + int getLowSpillSlot() const { + return LowSpillSlot; + } + + /// @brief Return highest spill slot index. + int getHighSpillSlot() const { + return HighSpillSlot; + } + + /// @brief Records a spill slot use. + void addSpillSlotUse(int FrameIndex, MachineInstr *MI); + + /// @brief Returns true if spill slot has been used. + bool isSpillSlotUsed(int FrameIndex) const { + assert(FrameIndex >= 0 && "Spill slot index should not be negative!"); + return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty(); + } + + /// @brief Mark the specified register as being implicitly defined. + void setIsImplicitlyDefined(unsigned VirtReg) { + ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister); + } + + /// @brief Returns true if the virtual register is implicitly defined. + bool isImplicitlyDefined(unsigned VirtReg) const { + return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister]; + } + + /// @brief Updates information about the specified virtual register's value + /// folded into newMI machine instruction. + void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI, + ModRef MRInfo); + + /// @brief Updates information about the specified virtual register's value + /// folded into the specified machine instruction. + void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo); + + /// @brief returns the virtual registers' values folded in memory + /// operands of this instruction + std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator> + getFoldedVirts(MachineInstr* MI) const { + return MI2VirtMap.equal_range(MI); + } + + /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the + /// the folded instruction map and spill point map. + void RemoveMachineInstrFromMaps(MachineInstr *MI); + + /// FindUnusedRegisters - Gather a list of allocatable registers that + /// have not been allocated to any virtual register. + bool FindUnusedRegisters(LiveIntervals* LIs); + + /// HasUnusedRegisters - Return true if there are any allocatable registers + /// that have not been allocated to any virtual register. + bool HasUnusedRegisters() const { + return !UnusedRegs.none(); + } + + /// setRegisterUsed - Remember the physical register is now used. + void setRegisterUsed(unsigned Reg) { + UnusedRegs.reset(Reg); + } + + /// isRegisterUnused - Return true if the physical register has not been + /// used. + bool isRegisterUnused(unsigned Reg) const { + return UnusedRegs[Reg]; + } + + /// getFirstUnusedRegister - Return the first physical register that has not + /// been used. + unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) { + int Reg = UnusedRegs.find_first(); + while (Reg != -1) { + if (allocatableRCRegs[RC][Reg]) + return (unsigned)Reg; + Reg = UnusedRegs.find_next(Reg); + } + return 0; + } + + void print(raw_ostream &OS, const Module* M = 0) const; + void dump() const; + }; + + inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) { + VRM.print(OS); + return OS; + } +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp b/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp new file mode 100644 index 0000000..240d28c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp @@ -0,0 +1,2552 @@ +//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "virtregrewriter" +#include "VirtRegRewriter.h" +#include "VirtRegMap.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumDSE , "Number of dead stores elided"); +STATISTIC(NumDSS , "Number of dead spill slots removed"); +STATISTIC(NumCommutes, "Number of instructions commuted"); +STATISTIC(NumDRM , "Number of re-materializable defs elided"); +STATISTIC(NumStores , "Number of stores added"); +STATISTIC(NumPSpills , "Number of physical register spills"); +STATISTIC(NumOmitted , "Number of reloads omited"); +STATISTIC(NumAvoided , "Number of reloads deemed unnecessary"); +STATISTIC(NumCopified, "Number of available reloads turned into copies"); +STATISTIC(NumReMats , "Number of re-materialization"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumReused , "Number of values reused"); +STATISTIC(NumDCE , "Number of copies elided"); +STATISTIC(NumSUnfold , "Number of stores unfolded"); +STATISTIC(NumModRefUnfold, "Number of modref unfolded"); + +namespace { + enum RewriterName { local, trivial }; +} + +static cl::opt<RewriterName> +RewriterOpt("rewriter", + cl::desc("Rewriter to use (default=local)"), + cl::Prefix, + cl::values(clEnumVal(local, "local rewriter"), + clEnumVal(trivial, "trivial rewriter"), + clEnumValEnd), + cl::init(local)); + +static cl::opt<bool> +ScheduleSpills("schedule-spills", + cl::desc("Schedule spill code"), + cl::init(false)); + +VirtRegRewriter::~VirtRegRewriter() {} + +/// substitutePhysReg - Replace virtual register in MachineOperand with a +/// physical register. Do the right thing with the sub-register index. +/// Note that operands may be added, so the MO reference is no longer valid. +static void substitutePhysReg(MachineOperand &MO, unsigned Reg, + const TargetRegisterInfo &TRI) { + if (MO.getSubReg()) { + MO.substPhysReg(Reg, TRI); + + // Any kill flags apply to the full virtual register, so they also apply to + // the full physical register. + // We assume that partial defs have already been decorated with a super-reg + // <imp-def> operand by LiveIntervals. + MachineInstr &MI = *MO.getParent(); + if (MO.isUse() && !MO.isUndef() && + (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0)))) + MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true); + } else { + MO.setReg(Reg); + } +} + +namespace { + +/// This class is intended for use with the new spilling framework only. It +/// rewrites vreg def/uses to use the assigned preg, but does not insert any +/// spill code. +struct TrivialRewriter : public VirtRegRewriter { + + bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, + LiveIntervals* LIs) { + DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n"); + DEBUG(dbgs() << "********** Function: " + << MF.getFunction()->getName() << '\n'); + DEBUG(dbgs() << "**** Machine Instrs" + << "(NOTE! Does not include spills and reloads!) ****\n"); + DEBUG(MF.dump()); + + MachineRegisterInfo *mri = &MF.getRegInfo(); + const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo(); + + bool changed = false; + + for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end(); + liItr != liEnd; ++liItr) { + + const LiveInterval *li = liItr->second; + unsigned reg = li->reg; + + if (TargetRegisterInfo::isPhysicalRegister(reg)) { + if (!li->empty()) + mri->setPhysRegUsed(reg); + } + else { + if (!VRM.hasPhys(reg)) + continue; + unsigned pReg = VRM.getPhys(reg); + mri->setPhysRegUsed(pReg); + // Copy the register use-list before traversing it. + SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist; + for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg), + E = mri->reg_end(); I != E; ++I) + reglist.push_back(std::make_pair(&*I, I.getOperandNo())); + for (unsigned N=0; N != reglist.size(); ++N) + substitutePhysReg(reglist[N].first->getOperand(reglist[N].second), + pReg, *tri); + changed |= !reglist.empty(); + } + } + + DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); + DEBUG(MF.dump()); + + return changed; + } + +}; + +} + +// ************************************************************************ // + +namespace { + +/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB +/// from top down, keep track of which spill slots or remat are available in +/// each register. +/// +/// Note that not all physregs are created equal here. In particular, some +/// physregs are reloads that we are allowed to clobber or ignore at any time. +/// Other physregs are values that the register allocated program is using +/// that we cannot CHANGE, but we can read if we like. We keep track of this +/// on a per-stack-slot / remat id basis as the low bit in the value of the +/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks +/// this bit and addAvailable sets it if. +class AvailableSpills { + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled + // or remat'ed virtual register values that are still available, due to + // being loaded or stored to, but not invalidated yet. + std::map<int, unsigned> SpillSlotsOrReMatsAvailable; + + // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable, + // indicating which stack slot values are currently held by a physreg. This + // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a + // physreg is modified. + std::multimap<unsigned, int> PhysRegsAvailable; + + void disallowClobberPhysRegOnly(unsigned PhysReg); + + void ClobberPhysRegOnly(unsigned PhysReg); +public: + AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii) + : TRI(tri), TII(tii) { + } + + /// clear - Reset the state. + void clear() { + SpillSlotsOrReMatsAvailable.clear(); + PhysRegsAvailable.clear(); + } + + const TargetRegisterInfo *getRegInfo() const { return TRI; } + + /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is + /// available in a physical register, return that PhysReg, otherwise + /// return 0. + unsigned getSpillSlotOrReMatPhysReg(int Slot) const { + std::map<int, unsigned>::const_iterator I = + SpillSlotsOrReMatsAvailable.find(Slot); + if (I != SpillSlotsOrReMatsAvailable.end()) { + return I->second >> 1; // Remove the CanClobber bit. + } + return 0; + } + + /// addAvailable - Mark that the specified stack slot / remat is available + /// in the specified physreg. If CanClobber is true, the physreg can be + /// modified at any time without changing the semantics of the program. + void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) { + // If this stack slot is thought to be available in some other physreg, + // remove its record. + ModifyStackSlotOrReMat(SlotOrReMat); + + PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat)); + SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) | + (unsigned)CanClobber; + + if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) + DEBUG(dbgs() << "Remembering RM#" + << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1); + else + DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat); + DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n"); + } + + /// canClobberPhysRegForSS - Return true if the spiller is allowed to change + /// the value of the specified stackslot register if it desires. The + /// specified stack slot must be available in a physreg for this query to + /// make sense. + bool canClobberPhysRegForSS(int SlotOrReMat) const { + assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) && + "Value not available!"); + return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1; + } + + /// canClobberPhysReg - Return true if the spiller is allowed to clobber the + /// physical register where values for some stack slot(s) might be + /// available. + bool canClobberPhysReg(unsigned PhysReg) const { + std::multimap<unsigned, int>::const_iterator I = + PhysRegsAvailable.lower_bound(PhysReg); + while (I != PhysRegsAvailable.end() && I->first == PhysReg) { + int SlotOrReMat = I->second; + I++; + if (!canClobberPhysRegForSS(SlotOrReMat)) + return false; + } + return true; + } + + /// disallowClobberPhysReg - Unset the CanClobber bit of the specified + /// stackslot register. The register is still available but is no longer + /// allowed to be modifed. + void disallowClobberPhysReg(unsigned PhysReg); + + /// ClobberPhysReg - This is called when the specified physreg changes + /// value. We use this to invalidate any info about stuff that lives in + /// it and any of its aliases. + void ClobberPhysReg(unsigned PhysReg); + + /// ModifyStackSlotOrReMat - This method is called when the value in a stack + /// slot changes. This removes information about which register the + /// previous value for this slot lives in (as the previous value is dead + /// now). + void ModifyStackSlotOrReMat(int SlotOrReMat); + + /// AddAvailableRegsToLiveIn - Availability information is being kept coming + /// into the specified MBB. Add available physical registers as potential + /// live-in's. If they are reused in the MBB, they will be added to the + /// live-in set to make register scavenger and post-allocation scheduler. + void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps); +}; + +} + +// ************************************************************************ // + +// Given a location where a reload of a spilled register or a remat of +// a constant is to be inserted, attempt to find a safe location to +// insert the load at an earlier point in the basic-block, to hide +// latency of the load and to avoid address-generation interlock +// issues. +static MachineBasicBlock::iterator +ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc, + MachineBasicBlock::iterator const Begin, + unsigned PhysReg, + const TargetRegisterInfo *TRI, + bool DoReMat, + int SSorRMId, + const TargetInstrInfo *TII, + const MachineFunction &MF) +{ + if (!ScheduleSpills) + return InsertLoc; + + // Spill backscheduling is of primary interest to addresses, so + // don't do anything if the register isn't in the register class + // used for pointers. + + const TargetLowering *TL = MF.getTarget().getTargetLowering(); + + if (!TL->isTypeLegal(TL->getPointerTy())) + // Believe it or not, this is true on PIC16. + return InsertLoc; + + const TargetRegisterClass *ptrRegClass = + TL->getRegClassFor(TL->getPointerTy()); + if (!ptrRegClass->contains(PhysReg)) + return InsertLoc; + + // Scan upwards through the preceding instructions. If an instruction doesn't + // reference the stack slot or the register we're loading, we can + // backschedule the reload up past it. + MachineBasicBlock::iterator NewInsertLoc = InsertLoc; + while (NewInsertLoc != Begin) { + MachineBasicBlock::iterator Prev = prior(NewInsertLoc); + for (unsigned i = 0; i < Prev->getNumOperands(); ++i) { + MachineOperand &Op = Prev->getOperand(i); + if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId) + goto stop; + } + if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 || + Prev->findRegisterDefOperand(PhysReg)) + goto stop; + for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias) + if (Prev->findRegisterUseOperandIdx(*Alias) != -1 || + Prev->findRegisterDefOperand(*Alias)) + goto stop; + NewInsertLoc = Prev; + } +stop:; + + // If we made it to the beginning of the block, turn around and move back + // down just past any existing reloads. They're likely to be reloads/remats + // for instructions earlier than what our current reload/remat is for, so + // they should be scheduled earlier. + if (NewInsertLoc == Begin) { + int FrameIdx; + while (InsertLoc != NewInsertLoc && + (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) || + TII->isTriviallyReMaterializable(NewInsertLoc))) + ++NewInsertLoc; + } + + return NewInsertLoc; +} + +namespace { + +// ReusedOp - For each reused operand, we keep track of a bit of information, +// in case we need to rollback upon processing a new operand. See comments +// below. +struct ReusedOp { + // The MachineInstr operand that reused an available value. + unsigned Operand; + + // StackSlotOrReMat - The spill slot or remat id of the value being reused. + unsigned StackSlotOrReMat; + + // PhysRegReused - The physical register the value was available in. + unsigned PhysRegReused; + + // AssignedPhysReg - The physreg that was assigned for use by the reload. + unsigned AssignedPhysReg; + + // VirtReg - The virtual register itself. + unsigned VirtReg; + + ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr, + unsigned vreg) + : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr), + AssignedPhysReg(apr), VirtReg(vreg) {} +}; + +/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that +/// is reused instead of reloaded. +class ReuseInfo { + MachineInstr &MI; + std::vector<ReusedOp> Reuses; + BitVector PhysRegsClobbered; +public: + ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) { + PhysRegsClobbered.resize(tri->getNumRegs()); + } + + bool hasReuses() const { + return !Reuses.empty(); + } + + /// addReuse - If we choose to reuse a virtual register that is already + /// available instead of reloading it, remember that we did so. + void addReuse(unsigned OpNo, unsigned StackSlotOrReMat, + unsigned PhysRegReused, unsigned AssignedPhysReg, + unsigned VirtReg) { + // If the reload is to the assigned register anyway, no undo will be + // required. + if (PhysRegReused == AssignedPhysReg) return; + + // Otherwise, remember this. + Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused, + AssignedPhysReg, VirtReg)); + } + + void markClobbered(unsigned PhysReg) { + PhysRegsClobbered.set(PhysReg); + } + + bool isClobbered(unsigned PhysReg) const { + return PhysRegsClobbered.test(PhysReg); + } + + /// GetRegForReload - We are about to emit a reload into PhysReg. If there + /// is some other operand that is using the specified register, either pick + /// a new register to use, or evict the previous reload and use this reg. + unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg, + MachineFunction &MF, MachineInstr *MI, + AvailableSpills &Spills, + std::vector<MachineInstr*> &MaybeDeadStores, + SmallSet<unsigned, 8> &Rejected, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM); + + /// GetRegForReload - Helper for the above GetRegForReload(). Add a + /// 'Rejected' set to remember which registers have been considered and + /// rejected for the reload. This avoids infinite looping in case like + /// this: + /// t1 := op t2, t3 + /// t2 <- assigned r0 for use by the reload but ended up reuse r1 + /// t3 <- assigned r1 for use by the reload but ended up reuse r0 + /// t1 <- desires r1 + /// sees r1 is taken by t2, tries t2's reload register r0 + /// sees r0 is taken by t3, tries t3's reload register r1 + /// sees r1 is taken by t2, tries t2's reload register r0 ... + unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI, + AvailableSpills &Spills, + std::vector<MachineInstr*> &MaybeDeadStores, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM) { + SmallSet<unsigned, 8> Rejected; + MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg); + return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps, VRM); + } +}; + +} + +// ****************** // +// Utility Functions // +// ****************** // + +/// findSinglePredSuccessor - Return via reference a vector of machine basic +/// blocks each of which is a successor of the specified BB and has no other +/// predecessor. +static void findSinglePredSuccessor(MachineBasicBlock *MBB, + SmallVectorImpl<MachineBasicBlock *> &Succs){ + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB->pred_size() == 1) + Succs.push_back(SuccMBB); + } +} + +/// InvalidateKill - Invalidate register kill information for a specific +/// register. This also unsets the kills marker on the last kill operand. +static void InvalidateKill(unsigned Reg, + const TargetRegisterInfo* TRI, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + if (RegKills[Reg]) { + KillOps[Reg]->setIsKill(false); + // KillOps[Reg] might be a def of a super-register. + unsigned KReg = KillOps[Reg]->getReg(); + KillOps[KReg] = NULL; + RegKills.reset(KReg); + for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) { + if (RegKills[*SR]) { + KillOps[*SR]->setIsKill(false); + KillOps[*SR] = NULL; + RegKills.reset(*SR); + } + } + } +} + +/// InvalidateKills - MI is going to be deleted. If any of its operands are +/// marked kill, then invalidate the information. +static void InvalidateKills(MachineInstr &MI, + const TargetRegisterInfo* TRI, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + SmallVector<unsigned, 2> *KillRegs = NULL) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef()) + continue; + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (KillRegs) + KillRegs->push_back(Reg); + assert(Reg < KillOps.size()); + if (KillOps[Reg] == &MO) { + KillOps[Reg] = NULL; + RegKills.reset(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + if (RegKills[*SR]) { + KillOps[*SR] = NULL; + RegKills.reset(*SR); + } + } + } + } +} + +/// InvalidateRegDef - If the def operand of the specified def MI is now dead +/// (since its spill instruction is removed), mark it isDead. Also checks if +/// the def MI has other definition operands that are not dead. Returns it by +/// reference. +static bool InvalidateRegDef(MachineBasicBlock::iterator I, + MachineInstr &NewDef, unsigned Reg, + bool &HasLiveDef, + const TargetRegisterInfo *TRI) { + // Due to remat, it's possible this reg isn't being reused. That is, + // the def of this reg (by prev MI) is now dead. + MachineInstr *DefMI = I; + MachineOperand *DefOp = NULL; + for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = DefMI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef()) + continue; + if (MO.getReg() == Reg) + DefOp = &MO; + else if (!MO.isDead()) + HasLiveDef = true; + } + if (!DefOp) + return false; + + bool FoundUse = false, Done = false; + MachineBasicBlock::iterator E = &NewDef; + ++I; ++E; + for (; !Done && I != E; ++I) { + MachineInstr *NMI = I; + for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = NMI->getOperand(j); + if (!MO.isReg() || MO.getReg() == 0 || + (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg()))) + continue; + if (MO.isUse()) + FoundUse = true; + Done = true; // Stop after scanning all the operands of this MI. + } + } + if (!FoundUse) { + // Def is dead! + DefOp->setIsDead(); + return true; + } + return false; +} + +/// UpdateKills - Track and update kill info. If a MI reads a register that is +/// marked kill, then it must be due to register reuse. Transfer the kill info +/// over. +static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + // These do not affect kill info at all. + if (MI.isDebugValue()) + return; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) { + // That can't be right. Register is killed but not re-defined and it's + // being reused. Let's fix that. + KillOps[Reg]->setIsKill(false); + // KillOps[Reg] might be a def of a super-register. + unsigned KReg = KillOps[Reg]->getReg(); + KillOps[KReg] = NULL; + RegKills.reset(KReg); + + // Must be a def of a super-register. Its other sub-regsters are no + // longer killed as well. + for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) { + KillOps[*SR] = NULL; + RegKills.reset(*SR); + } + } else { + // Check for subreg kills as well. + // d4 = + // store d4, fi#0 + // ... + // = s8<kill> + // ... + // = d4 <avoiding reload> + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + unsigned SReg = *SR; + if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) { + KillOps[SReg]->setIsKill(false); + unsigned KReg = KillOps[SReg]->getReg(); + KillOps[KReg] = NULL; + RegKills.reset(KReg); + + for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) { + KillOps[*SSR] = NULL; + RegKills.reset(*SSR); + } + } + } + } + + if (MO.isKill()) { + RegKills.set(Reg); + KillOps[Reg] = &MO; + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + RegKills.set(*SR); + KillOps[*SR] = &MO; + } + } + } + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + RegKills.reset(Reg); + KillOps[Reg] = NULL; + // It also defines (or partially define) aliases. + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + RegKills.reset(*SR); + KillOps[*SR] = NULL; + } + for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) { + RegKills.reset(*SR); + KillOps[*SR] = NULL; + } + } +} + +/// ReMaterialize - Re-materialize definition for Reg targetting DestReg. +/// +static void ReMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + unsigned DestReg, unsigned Reg, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM) { + MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg); +#ifndef NDEBUG + const TargetInstrDesc &TID = ReMatDefMI->getDesc(); + assert(TID.getNumDefs() == 1 && + "Don't know how to remat instructions that define > 1 values!"); +#endif + TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI); + MachineInstr *NewMI = prior(MII); + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned VirtReg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) + continue; + assert(MO.isUse()); + unsigned Phys = VRM.getPhys(VirtReg); + assert(Phys && "Virtual register is not assigned a register?"); + substitutePhysReg(MO, Phys, *TRI); + } + ++NumReMats; +} + +/// findSuperReg - Find the SubReg's super-register of given register class +/// where its SubIdx sub-register is SubReg. +static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg, + unsigned SubIdx, const TargetRegisterInfo *TRI) { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + unsigned Reg = *I; + if (TRI->getSubReg(Reg, SubIdx) == SubReg) + return Reg; + } + return 0; +} + +// ******************************** // +// Available Spills Implementation // +// ******************************** // + +/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified +/// stackslot register. The register is still available but is no longer +/// allowed to be modifed. +void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { + std::multimap<unsigned, int>::iterator I = + PhysRegsAvailable.lower_bound(PhysReg); + while (I != PhysRegsAvailable.end() && I->first == PhysReg) { + int SlotOrReMat = I->second; + I++; + assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && + "Bidirectional map mismatch!"); + SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1; + DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) + << " copied, it is available for use but can no longer be modified\n"); + } +} + +/// disallowClobberPhysReg - Unset the CanClobber bit of the specified +/// stackslot register and its aliases. The register and its aliases may +/// still available but is no longer allowed to be modifed. +void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) { + for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) + disallowClobberPhysRegOnly(*AS); + disallowClobberPhysRegOnly(PhysReg); +} + +/// ClobberPhysRegOnly - This is called when the specified physreg changes +/// value. We use this to invalidate any info about stuff we thing lives in it. +void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { + std::multimap<unsigned, int>::iterator I = + PhysRegsAvailable.lower_bound(PhysReg); + while (I != PhysRegsAvailable.end() && I->first == PhysReg) { + int SlotOrReMat = I->second; + PhysRegsAvailable.erase(I++); + assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && + "Bidirectional map mismatch!"); + SpillSlotsOrReMatsAvailable.erase(SlotOrReMat); + DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) + << " clobbered, invalidating "); + if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) + DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n"); + else + DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n"); + } +} + +/// ClobberPhysReg - This is called when the specified physreg changes +/// value. We use this to invalidate any info about stuff we thing lives in +/// it and any of its aliases. +void AvailableSpills::ClobberPhysReg(unsigned PhysReg) { + for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) + ClobberPhysRegOnly(*AS); + ClobberPhysRegOnly(PhysReg); +} + +/// AddAvailableRegsToLiveIn - Availability information is being kept coming +/// into the specified MBB. Add available physical registers as potential +/// live-in's. If they are reused in the MBB, they will be added to the +/// live-in set to make register scavenger and post-allocation scheduler. +void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + std::set<unsigned> NotAvailable; + for (std::multimap<unsigned, int>::iterator + I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end(); + I != E; ++I) { + unsigned Reg = I->first; + const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg); + // FIXME: A temporary workaround. We can't reuse available value if it's + // not safe to move the def of the virtual register's class. e.g. + // X86::RFP* register classes. Do not add it as a live-in. + if (!TII->isSafeToMoveRegClassDefs(RC)) + // This is no longer available. + NotAvailable.insert(Reg); + else { + MBB.addLiveIn(Reg); + InvalidateKill(Reg, TRI, RegKills, KillOps); + } + + // Skip over the same register. + std::multimap<unsigned, int>::iterator NI = llvm::next(I); + while (NI != E && NI->first == Reg) { + ++I; + ++NI; + } + } + + for (std::set<unsigned>::iterator I = NotAvailable.begin(), + E = NotAvailable.end(); I != E; ++I) { + ClobberPhysReg(*I); + for (const unsigned *SubRegs = TRI->getSubRegisters(*I); + *SubRegs; ++SubRegs) + ClobberPhysReg(*SubRegs); + } +} + +/// ModifyStackSlotOrReMat - This method is called when the value in a stack +/// slot changes. This removes information about which register the previous +/// value for this slot lives in (as the previous value is dead now). +void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { + std::map<int, unsigned>::iterator It = + SpillSlotsOrReMatsAvailable.find(SlotOrReMat); + if (It == SpillSlotsOrReMatsAvailable.end()) return; + unsigned Reg = It->second >> 1; + SpillSlotsOrReMatsAvailable.erase(It); + + // This register may hold the value of multiple stack slots, only remove this + // stack slot from the set of values the register contains. + std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg); + for (; ; ++I) { + assert(I != PhysRegsAvailable.end() && I->first == Reg && + "Map inverse broken!"); + if (I->second == SlotOrReMat) break; + } + PhysRegsAvailable.erase(I); +} + +// ************************** // +// Reuse Info Implementation // +// ************************** // + +/// GetRegForReload - We are about to emit a reload into PhysReg. If there +/// is some other operand that is using the specified register, either pick +/// a new register to use, or evict the previous reload and use this reg. +unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, + unsigned PhysReg, + MachineFunction &MF, + MachineInstr *MI, AvailableSpills &Spills, + std::vector<MachineInstr*> &MaybeDeadStores, + SmallSet<unsigned, 8> &Rejected, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM) { + const TargetInstrInfo* TII = MF.getTarget().getInstrInfo(); + const TargetRegisterInfo *TRI = Spills.getRegInfo(); + + if (Reuses.empty()) return PhysReg; // This is most often empty. + + for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) { + ReusedOp &Op = Reuses[ro]; + // If we find some other reuse that was supposed to use this register + // exactly for its reload, we can change this reload to use ITS reload + // register. That is, unless its reload register has already been + // considered and subsequently rejected because it has also been reused + // by another operand. + if (Op.PhysRegReused == PhysReg && + Rejected.count(Op.AssignedPhysReg) == 0 && + RC->contains(Op.AssignedPhysReg)) { + // Yup, use the reload register that we didn't use before. + unsigned NewReg = Op.AssignedPhysReg; + Rejected.insert(PhysReg); + return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps, VRM); + } else { + // Otherwise, we might also have a problem if a previously reused + // value aliases the new register. If so, codegen the previous reload + // and use this one. + unsigned PRRU = Op.PhysRegReused; + if (TRI->regsOverlap(PRRU, PhysReg)) { + // Okay, we found out that an alias of a reused register + // was used. This isn't good because it means we have + // to undo a previous reuse. + MachineBasicBlock *MBB = MI->getParent(); + const TargetRegisterClass *AliasRC = + MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg); + + // Copy Op out of the vector and remove it, we're going to insert an + // explicit load for it. + ReusedOp NewOp = Op; + Reuses.erase(Reuses.begin()+ro); + + // MI may be using only a sub-register of PhysRegUsed. + unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg(); + unsigned SubIdx = 0; + assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) && + "A reuse cannot be a virtual register"); + if (PRRU != RealPhysRegUsed) { + // What was the sub-register index? + SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed); + assert(SubIdx && + "Operand physreg is not a sub-register of PhysRegUsed"); + } + + // Ok, we're going to try to reload the assigned physreg into the + // slot that we were supposed to in the first place. However, that + // register could hold a reuse. Check to see if it conflicts or + // would prefer us to use a different register. + unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg, + MF, MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps, VRM); + + bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT; + int SSorRMId = DoReMat + ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat; + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, + DoReMat, SSorRMId, TII, MF); + + if (DoReMat) { + ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII, + TRI, VRM); + } else { + TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg, + NewOp.StackSlotOrReMat, AliasRC, TRI); + MachineInstr *LoadMI = prior(InsertLoc); + VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI); + // Any stores to this stack slot are not dead anymore. + MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL; + ++NumLoads; + } + Spills.ClobberPhysReg(NewPhysReg); + Spills.ClobberPhysReg(NewOp.PhysRegReused); + + unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg; + MI->getOperand(NewOp.Operand).setReg(RReg); + MI->getOperand(NewOp.Operand).setSubReg(0); + + Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); + UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); + DEBUG(dbgs() << '\t' << *prior(InsertLoc)); + + DEBUG(dbgs() << "Reuse undone!\n"); + --NumReused; + + // Finally, PhysReg is now available, go ahead and use it. + return PhysReg; + } + } + } + return PhysReg; +} + +// ************************************************************************ // + +/// FoldsStackSlotModRef - Return true if the specified MI folds the specified +/// stack slot mod/ref. It also checks if it's possible to unfold the +/// instruction by having it define a specified physical register instead. +static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM) { + if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI)) + return false; + + bool Found = false; + VirtRegMap::MI2VirtMapTy::const_iterator I, End; + for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) { + unsigned VirtReg = I->second.first; + VirtRegMap::ModRef MR = I->second.second; + if (MR & VirtRegMap::isModRef) + if (VRM.getStackSlot(VirtReg) == SS) { + Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0; + break; + } + } + if (!Found) + return false; + + // Does the instruction uses a register that overlaps the scratch register? + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!VRM.hasPhys(Reg)) + continue; + Reg = VRM.getPhys(Reg); + } + if (TRI->regsOverlap(PhysReg, Reg)) + return false; + } + return true; +} + +/// FindFreeRegister - Find a free register of a given register class by looking +/// at (at most) the last two machine instructions. +static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, + MachineBasicBlock &MBB, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + BitVector &AllocatableRegs) { + BitVector Defs(TRI->getNumRegs()); + BitVector Uses(TRI->getNumRegs()); + SmallVector<unsigned, 4> LocalUses; + SmallVector<unsigned, 4> Kills; + + // Take a look at 2 instructions at most. + unsigned Count = 0; + while (Count < 2) { + if (MII == MBB.begin()) + break; + MachineInstr *PrevMI = prior(MII); + MII = PrevMI; + + if (PrevMI->isDebugValue()) + continue; // Skip over dbg_value instructions. + ++Count; + + for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = PrevMI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned Reg = MO.getReg(); + if (MO.isDef()) { + Defs.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Defs.set(*AS); + } else { + LocalUses.push_back(Reg); + if (MO.isKill() && AllocatableRegs[Reg]) + Kills.push_back(Reg); + } + } + + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned Kill = Kills[i]; + if (!Defs[Kill] && !Uses[Kill] && + RC->contains(Kill)) + return Kill; + } + for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { + unsigned Reg = LocalUses[i]; + Uses.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Uses.set(*AS); + } + } + + return 0; +} + +static +void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg, + const TargetRegisterInfo &TRI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == VirtReg) + substitutePhysReg(MO, PhysReg, TRI); + } +} + +namespace { + +struct RefSorter { + bool operator()(const std::pair<MachineInstr*, int> &A, + const std::pair<MachineInstr*, int> &B) { + return A.second < B.second; + } +}; + +// ***************************** // +// Local Spiller Implementation // +// ***************************** // + +class LocalRewriter : public VirtRegRewriter { + MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + VirtRegMap *VRM; + BitVector AllocatableRegs; + DenseMap<MachineInstr*, unsigned> DistanceMap; + DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues; + + MachineBasicBlock *MBB; // Basic block currently being processed. + +public: + + bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, + LiveIntervals* LIs); + +private: + + bool OptimizeByUnfold2(unsigned VirtReg, int SS, + MachineBasicBlock::iterator &MII, + std::vector<MachineInstr*> &MaybeDeadStores, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps); + + bool OptimizeByUnfold(MachineBasicBlock::iterator &MII, + std::vector<MachineInstr*> &MaybeDeadStores, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps); + + bool CommuteToFoldReload(MachineBasicBlock::iterator &MII, + unsigned VirtReg, unsigned SrcReg, int SS, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + const TargetRegisterInfo *TRI); + + void SpillRegToStackSlot(MachineBasicBlock::iterator &MII, + int Idx, unsigned PhysReg, int StackSlot, + const TargetRegisterClass *RC, + bool isAvailable, MachineInstr *&LastStore, + AvailableSpills &Spills, + SmallSet<MachineInstr*, 4> &ReMatDefs, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps); + + void TransferDeadness(unsigned Reg, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps); + + bool InsertEmergencySpills(MachineInstr *MI); + + bool InsertRestores(MachineInstr *MI, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps); + + bool InsertSpills(MachineInstr *MI); + + void RewriteMBB(LiveIntervals *LIs, + AvailableSpills &Spills, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps); +}; +} + +bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm, + LiveIntervals* LIs) { + MRI = &MF.getRegInfo(); + TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + VRM = &vrm; + AllocatableRegs = TRI->getAllocatableSet(MF); + DEBUG(dbgs() << "\n**** Local spiller rewriting function '" + << MF.getFunction()->getName() << "':\n"); + DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and" + " reloads!) ****\n"); + DEBUG(MF.dump()); + + // Spills - Keep track of which spilled values are available in physregs + // so that we can choose to reuse the physregs instead of emitting + // reloads. This is usually refreshed per basic block. + AvailableSpills Spills(TRI, TII); + + // Keep track of kill information. + BitVector RegKills(TRI->getNumRegs()); + std::vector<MachineOperand*> KillOps; + KillOps.resize(TRI->getNumRegs(), NULL); + + // SingleEntrySuccs - Successor blocks which have a single predecessor. + SmallVector<MachineBasicBlock*, 4> SinglePredSuccs; + SmallPtrSet<MachineBasicBlock*,16> EarlyVisited; + + // Traverse the basic blocks depth first. + MachineBasicBlock *Entry = MF.begin(); + SmallPtrSet<MachineBasicBlock*,16> Visited; + for (df_ext_iterator<MachineBasicBlock*, + SmallPtrSet<MachineBasicBlock*,16> > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + MBB = *DFI; + if (!EarlyVisited.count(MBB)) + RewriteMBB(LIs, Spills, RegKills, KillOps); + + // If this MBB is the only predecessor of a successor. Keep the + // availability information and visit it next. + do { + // Keep visiting single predecessor successor as long as possible. + SinglePredSuccs.clear(); + findSinglePredSuccessor(MBB, SinglePredSuccs); + if (SinglePredSuccs.empty()) + MBB = 0; + else { + // FIXME: More than one successors, each of which has MBB has + // the only predecessor. + MBB = SinglePredSuccs[0]; + if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) { + Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps); + RewriteMBB(LIs, Spills, RegKills, KillOps); + } + } + } while (MBB); + + // Clear the availability info. + Spills.clear(); + } + + DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); + DEBUG(MF.dump()); + + // Mark unused spill slots. + MachineFrameInfo *MFI = MF.getFrameInfo(); + int SS = VRM->getLowSpillSlot(); + if (SS != VirtRegMap::NO_STACK_SLOT) { + for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) { + SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS]; + if (!VRM->isSpillSlotUsed(SS)) { + MFI->RemoveStackObject(SS); + for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) { + MachineInstr *DVMI = DbgValues[j]; + MachineBasicBlock *DVMBB = DVMI->getParent(); + DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n'); + VRM->RemoveMachineInstrFromMaps(DVMI); + DVMBB->erase(DVMI); + } + ++NumDSS; + } + DbgValues.clear(); + } + } + Slot2DbgValues.clear(); + + return true; +} + +/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if +/// a scratch register is available. +/// xorq %r12<kill>, %r13 +/// addq %rax, -184(%rbp) +/// addq %r13, -184(%rbp) +/// ==> +/// xorq %r12<kill>, %r13 +/// movq -184(%rbp), %r12 +/// addq %rax, %r12 +/// addq %r13, %r12 +/// movq %r12, -184(%rbp) +bool LocalRewriter:: +OptimizeByUnfold2(unsigned VirtReg, int SS, + MachineBasicBlock::iterator &MII, + std::vector<MachineInstr*> &MaybeDeadStores, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + + MachineBasicBlock::iterator NextMII = llvm::next(MII); + // Skip over dbg_value instructions. + while (NextMII != MBB->end() && NextMII->isDebugValue()) + NextMII = llvm::next(NextMII); + if (NextMII == MBB->end()) + return false; + + if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0) + return false; + + // Now let's see if the last couple of instructions happens to have freed up + // a register. + const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); + unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs); + if (!PhysReg) + return false; + + MachineFunction &MF = *MBB->getParent(); + TRI = MF.getTarget().getRegisterInfo(); + MachineInstr &MI = *MII; + if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM)) + return false; + + // If the next instruction also folds the same SS modref and can be unfoled, + // then it's worthwhile to issue a load from SS into the free register and + // then unfold these instructions. + if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)) + return false; + + // Back-schedule reloads and remats. + ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF); + + // Load from SS to the spare physical register. + TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI); + // This invalidates Phys. + Spills.ClobberPhysReg(PhysReg); + // Remember it's available. + Spills.addAvailable(SS, PhysReg); + MaybeDeadStores[SS] = NULL; + + // Unfold current MI. + SmallVector<MachineInstr*, 4> NewMIs; + if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) + llvm_unreachable("Unable unfold the load / store folding instruction!"); + assert(NewMIs.size() == 1); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); + VRM->transferRestorePts(&MI, NewMIs[0]); + MII = MBB->insert(MII, NewMIs[0]); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + ++NumModRefUnfold; + + // Unfold next instructions that fold the same SS. + do { + MachineInstr &NextMI = *NextMII; + NextMII = llvm::next(NextMII); + NewMIs.clear(); + if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) + llvm_unreachable("Unable unfold the load / store folding instruction!"); + assert(NewMIs.size() == 1); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); + VRM->transferRestorePts(&NextMI, NewMIs[0]); + MBB->insert(NextMII, NewMIs[0]); + InvalidateKills(NextMI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&NextMI); + MBB->erase(&NextMI); + ++NumModRefUnfold; + // Skip over dbg_value instructions. + while (NextMII != MBB->end() && NextMII->isDebugValue()) + NextMII = llvm::next(NextMII); + if (NextMII == MBB->end()) + break; + } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)); + + // Store the value back into SS. + TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI); + MachineInstr *StoreMI = prior(NextMII); + VRM->addSpillSlotUse(SS, StoreMI); + VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); + + return true; +} + +/// OptimizeByUnfold - Turn a store folding instruction into a load folding +/// instruction. e.g. +/// xorl %edi, %eax +/// movl %eax, -32(%ebp) +/// movl -36(%ebp), %eax +/// orl %eax, -32(%ebp) +/// ==> +/// xorl %edi, %eax +/// orl -36(%ebp), %eax +/// mov %eax, -32(%ebp) +/// This enables unfolding optimization for a subsequent instruction which will +/// also eliminate the newly introduced store instruction. +bool LocalRewriter:: +OptimizeByUnfold(MachineBasicBlock::iterator &MII, + std::vector<MachineInstr*> &MaybeDeadStores, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + MachineFunction &MF = *MBB->getParent(); + MachineInstr &MI = *MII; + unsigned UnfoldedOpc = 0; + unsigned UnfoldPR = 0; + unsigned UnfoldVR = 0; + int FoldedSS = VirtRegMap::NO_STACK_SLOT; + VirtRegMap::MI2VirtMapTy::const_iterator I, End; + for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) { + // Only transform a MI that folds a single register. + if (UnfoldedOpc) + return false; + UnfoldVR = I->second.first; + VirtRegMap::ModRef MR = I->second.second; + // MI2VirtMap be can updated which invalidate the iterator. + // Increment the iterator first. + ++I; + if (VRM->isAssignedReg(UnfoldVR)) + continue; + // If this reference is not a use, any previous store is now dead. + // Otherwise, the store to this stack slot is not dead anymore. + FoldedSS = VRM->getStackSlot(UnfoldVR); + MachineInstr* DeadStore = MaybeDeadStores[FoldedSS]; + if (DeadStore && (MR & VirtRegMap::isModRef)) { + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS); + if (!PhysReg || !DeadStore->readsRegister(PhysReg)) + continue; + UnfoldPR = PhysReg; + UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), + false, true); + } + } + + if (!UnfoldedOpc) { + if (!UnfoldVR) + return false; + + // Look for other unfolding opportunities. + return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills, + RegKills, KillOps); + } + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse()) + continue; + unsigned VirtReg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg()) + continue; + if (VRM->isAssignedReg(VirtReg)) { + unsigned PhysReg = VRM->getPhys(VirtReg); + if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR)) + return false; + } else if (VRM->isReMaterialized(VirtReg)) + continue; + int SS = VRM->getStackSlot(VirtReg); + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); + if (PhysReg) { + if (TRI->regsOverlap(PhysReg, UnfoldPR)) + return false; + continue; + } + if (VRM->hasPhys(VirtReg)) { + PhysReg = VRM->getPhys(VirtReg); + if (!TRI->regsOverlap(PhysReg, UnfoldPR)) + continue; + } + + // Ok, we'll need to reload the value into a register which makes + // it impossible to perform the store unfolding optimization later. + // Let's see if it is possible to fold the load if the store is + // unfolded. This allows us to perform the store unfolding + // optimization. + SmallVector<MachineInstr*, 4> NewMIs; + if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) { + assert(NewMIs.size() == 1); + MachineInstr *NewMI = NewMIs.back(); + MBB->insert(MII, NewMI); + NewMIs.clear(); + int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false); + assert(Idx != -1); + SmallVector<unsigned, 1> Ops; + Ops.push_back(Idx); + MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS); + NewMI->eraseFromParent(); + if (FoldedMI) { + VRM->addSpillSlotUse(SS, FoldedMI); + if (!VRM->hasPhys(UnfoldVR)) + VRM->assignVirt2Phys(UnfoldVR, UnfoldPR); + VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); + MII = FoldedMI; + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + return true; + } + } + } + + return false; +} + +/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and +/// where SrcReg is r1 and it is tied to r0. Return true if after +/// commuting this instruction it will be r0 = op r2, r1. +static bool CommuteChangesDestination(MachineInstr *DefMI, + const TargetInstrDesc &TID, + unsigned SrcReg, + const TargetInstrInfo *TII, + unsigned &DstIdx) { + if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3) + return false; + if (!DefMI->getOperand(1).isReg() || + DefMI->getOperand(1).getReg() != SrcReg) + return false; + unsigned DefIdx; + if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0) + return false; + unsigned SrcIdx1, SrcIdx2; + if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2)) + return false; + if (SrcIdx1 == 1 && SrcIdx2 == 2) { + DstIdx = 2; + return true; + } + return false; +} + +/// CommuteToFoldReload - +/// Look for +/// r1 = load fi#1 +/// r1 = op r1, r2<kill> +/// store r1, fi#1 +/// +/// If op is commutable and r2 is killed, then we can xform these to +/// r2 = op r2, fi#1 +/// store r2, fi#1 +bool LocalRewriter:: +CommuteToFoldReload(MachineBasicBlock::iterator &MII, + unsigned VirtReg, unsigned SrcReg, int SS, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + const TargetRegisterInfo *TRI) { + if (MII == MBB->begin() || !MII->killsRegister(SrcReg)) + return false; + + MachineInstr &MI = *MII; + MachineBasicBlock::iterator DefMII = prior(MII); + MachineInstr *DefMI = DefMII; + const TargetInstrDesc &TID = DefMI->getDesc(); + unsigned NewDstIdx; + if (DefMII != MBB->begin() && + TID.isCommutable() && + CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) { + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) + return false; + MachineInstr *ReloadMI = prior(DefMII); + int FrameIdx; + unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx); + if (DestReg != SrcReg || FrameIdx != SS) + return false; + int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false); + if (UseIdx == -1) + return false; + unsigned DefIdx; + if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx)) + return false; + assert(DefMI->getOperand(DefIdx).isReg() && + DefMI->getOperand(DefIdx).getReg() == SrcReg); + + // Now commute def instruction. + MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true); + if (!CommutedMI) + return false; + MBB->insert(MII, CommutedMI); + SmallVector<unsigned, 1> Ops; + Ops.push_back(NewDstIdx); + MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS); + // Not needed since foldMemoryOperand returns new MI. + CommutedMI->eraseFromParent(); + if (!FoldedMI) + return false; + + VRM->addSpillSlotUse(SS, FoldedMI); + VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); + // Insert new def MI and spill MI. + const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); + TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI); + MII = prior(MII); + MachineInstr *StoreMI = MII; + VRM->addSpillSlotUse(SS, StoreMI); + VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); + MII = FoldedMI; // Update MII to backtrack. + + // Delete all 3 old instructions. + InvalidateKills(*ReloadMI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(ReloadMI); + MBB->erase(ReloadMI); + InvalidateKills(*DefMI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(DefMI); + MBB->erase(DefMI); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + + // If NewReg was previously holding value of some SS, it's now clobbered. + // This has to be done now because it's a physical register. When this + // instruction is re-visited, it's ignored. + Spills.ClobberPhysReg(NewReg); + + ++NumCommutes; + return true; + } + + return false; +} + +/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if +/// the last store to the same slot is now dead. If so, remove the last store. +void LocalRewriter:: +SpillRegToStackSlot(MachineBasicBlock::iterator &MII, + int Idx, unsigned PhysReg, int StackSlot, + const TargetRegisterClass *RC, + bool isAvailable, MachineInstr *&LastStore, + AvailableSpills &Spills, + SmallSet<MachineInstr*, 4> &ReMatDefs, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + + MachineBasicBlock::iterator oldNextMII = llvm::next(MII); + TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC, + TRI); + MachineInstr *StoreMI = prior(oldNextMII); + VRM->addSpillSlotUse(StackSlot, StoreMI); + DEBUG(dbgs() << "Store:\t" << *StoreMI); + + // If there is a dead store to this stack slot, nuke it now. + if (LastStore) { + DEBUG(dbgs() << "Removed dead store:\t" << *LastStore); + ++NumDSE; + SmallVector<unsigned, 2> KillRegs; + InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs); + MachineBasicBlock::iterator PrevMII = LastStore; + bool CheckDef = PrevMII != MBB->begin(); + if (CheckDef) + --PrevMII; + VRM->RemoveMachineInstrFromMaps(LastStore); + MBB->erase(LastStore); + if (CheckDef) { + // Look at defs of killed registers on the store. Mark the defs + // as dead since the store has been deleted and they aren't + // being reused. + for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { + bool HasOtherDef = false; + if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) { + MachineInstr *DeadDef = PrevMII; + if (ReMatDefs.count(DeadDef) && !HasOtherDef) { + // FIXME: This assumes a remat def does not have side effects. + VRM->RemoveMachineInstrFromMaps(DeadDef); + MBB->erase(DeadDef); + ++NumDRM; + } + } + } + } + } + + // Allow for multi-instruction spill sequences, as on PPC Altivec. Presume + // the last of multiple instructions is the actual store. + LastStore = prior(oldNextMII); + + // If the stack slot value was previously available in some other + // register, change it now. Otherwise, make the register available, + // in PhysReg. + Spills.ModifyStackSlotOrReMat(StackSlot); + Spills.ClobberPhysReg(PhysReg); + Spills.addAvailable(StackSlot, PhysReg, isAvailable); + ++NumStores; +} + +/// isSafeToDelete - Return true if this instruction doesn't produce any side +/// effect and all of its defs are dead. +static bool isSafeToDelete(MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() || + TID.isCall() || TID.isBarrier() || TID.isReturn() || + TID.hasUnmodeledSideEffects()) + return false; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && !MO.isDead()) + return false; + if (MO.isUse() && MO.isKill()) + // FIXME: We can't remove kill markers or else the scavenger will assert. + // An alternative is to add a ADD pseudo instruction to replace kill + // markers. + return false; + } + return true; +} + +/// TransferDeadness - A identity copy definition is dead and it's being +/// removed. Find the last def or use and mark it as dead / kill. +void LocalRewriter:: +TransferDeadness(unsigned Reg, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + SmallPtrSet<MachineInstr*, 4> Seens; + SmallVector<std::pair<MachineInstr*, int>,8> Refs; + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), + RE = MRI->reg_end(); RI != RE; ++RI) { + MachineInstr *UDMI = &*RI; + if (UDMI->isDebugValue() || UDMI->getParent() != MBB) + continue; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI); + if (DI == DistanceMap.end()) + continue; + if (Seens.insert(UDMI)) + Refs.push_back(std::make_pair(UDMI, DI->second)); + } + + if (Refs.empty()) + return; + std::sort(Refs.begin(), Refs.end(), RefSorter()); + + while (!Refs.empty()) { + MachineInstr *LastUDMI = Refs.back().first; + Refs.pop_back(); + + MachineOperand *LastUD = NULL; + for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = LastUDMI->getOperand(i); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (!LastUD || (LastUD->isUse() && MO.isDef())) + LastUD = &MO; + if (LastUDMI->isRegTiedToDefOperand(i)) + break; + } + if (LastUD->isDef()) { + // If the instruction has no side effect, delete it and propagate + // backward further. Otherwise, mark is dead and we are done. + if (!isSafeToDelete(*LastUDMI)) { + LastUD->setIsDead(); + break; + } + VRM->RemoveMachineInstrFromMaps(LastUDMI); + MBB->erase(LastUDMI); + } else { + LastUD->setIsKill(); + RegKills.set(Reg); + KillOps[Reg] = LastUD; + break; + } + } +} + +/// InsertEmergencySpills - Insert emergency spills before MI if requested by +/// VRM. Return true if spills were inserted. +bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) { + if (!VRM->hasEmergencySpills(MI)) + return false; + MachineBasicBlock::iterator MII = MI; + SmallSet<int, 4> UsedSS; + std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI); + for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) { + unsigned PhysReg = EmSpills[i]; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg); + assert(RC && "Unable to determine register class!"); + int SS = VRM->getEmergencySpillSlot(RC); + if (UsedSS.count(SS)) + llvm_unreachable("Need to spill more than one physical registers!"); + UsedSS.insert(SS); + TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI); + MachineInstr *StoreMI = prior(MII); + VRM->addSpillSlotUse(SS, StoreMI); + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS, + TII, *MBB->getParent()); + + TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI); + + MachineInstr *LoadMI = prior(InsertLoc); + VRM->addSpillSlotUse(SS, LoadMI); + ++NumPSpills; + DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); + } + return true; +} + +/// InsertRestores - Restore registers before MI is requested by VRM. Return +/// true is any instructions were inserted. +bool LocalRewriter::InsertRestores(MachineInstr *MI, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + if (!VRM->isRestorePt(MI)) + return false; + MachineBasicBlock::iterator MII = MI; + std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI); + for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) { + unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order. + if (!VRM->getPreSplitReg(VirtReg)) + continue; // Split interval spilled again. + unsigned Phys = VRM->getPhys(VirtReg); + MRI->setPhysRegUsed(Phys); + + // Check if the value being restored if available. If so, it must be + // from a predecessor BB that fallthrough into this BB. We do not + // expect: + // BB1: + // r1 = load fi#1 + // ... + // = r1<kill> + // ... # r1 not clobbered + // ... + // = load fi#1 + bool DoReMat = VRM->isReMaterialized(VirtReg); + int SSorRMId = DoReMat + ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); + unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); + if (InReg == Phys) { + // If the value is already available in the expected register, save + // a reload / remat. + if (SSorRMId) + DEBUG(dbgs() << "Reusing RM#" + << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); + else + DEBUG(dbgs() << "Reusing SS#" << SSorRMId); + DEBUG(dbgs() << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(Phys) << '\n'); + ++NumOmitted; + continue; + } else if (InReg && InReg != Phys) { + if (SSorRMId) + DEBUG(dbgs() << "Reusing RM#" + << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); + else + DEBUG(dbgs() << "Reusing SS#" << SSorRMId); + DEBUG(dbgs() << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" by copying it into physreg " + << TRI->getName(Phys) << '\n'); + + // If the reloaded / remat value is available in another register, + // copy it to the desired register. + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, + *MBB->getParent()); + MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), Phys) + .addReg(InReg, RegState::Kill); + + // This invalidates Phys. + Spills.ClobberPhysReg(Phys); + // Remember it's available. + Spills.addAvailable(SSorRMId, Phys); + + CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); + UpdateKills(*CopyMI, TRI, RegKills, KillOps); + + DEBUG(dbgs() << '\t' << *CopyMI); + ++NumCopified; + continue; + } + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, + *MBB->getParent()); + + if (VRM->isReMaterialized(VirtReg)) { + ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM); + } else { + const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); + TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI); + MachineInstr *LoadMI = prior(InsertLoc); + VRM->addSpillSlotUse(SSorRMId, LoadMI); + ++NumLoads; + DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); + } + + // This invalidates Phys. + Spills.ClobberPhysReg(Phys); + // Remember it's available. + Spills.addAvailable(SSorRMId, Phys); + + UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); + DEBUG(dbgs() << '\t' << *prior(MII)); + } + return true; +} + +/// InsertEmergencySpills - Insert spills after MI if requested by VRM. Return +/// true if spills were inserted. +bool LocalRewriter::InsertSpills(MachineInstr *MI) { + if (!VRM->isSpillPt(MI)) + return false; + MachineBasicBlock::iterator MII = MI; + std::vector<std::pair<unsigned,bool> > &SpillRegs = + VRM->getSpillPtSpills(MI); + for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) { + unsigned VirtReg = SpillRegs[i].first; + bool isKill = SpillRegs[i].second; + if (!VRM->getPreSplitReg(VirtReg)) + continue; // Split interval spilled again. + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + unsigned Phys = VRM->getPhys(VirtReg); + int StackSlot = VRM->getStackSlot(VirtReg); + MachineBasicBlock::iterator oldNextMII = llvm::next(MII); + TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot, + RC, TRI); + MachineInstr *StoreMI = prior(oldNextMII); + VRM->addSpillSlotUse(StackSlot, StoreMI); + DEBUG(dbgs() << "Store:\t" << *StoreMI); + VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); + } + return true; +} + + +/// rewriteMBB - Keep track of which spills are available even after the +/// register allocator is done with them. If possible, avoid reloading vregs. +void +LocalRewriter::RewriteMBB(LiveIntervals *LIs, + AvailableSpills &Spills, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + + DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '" + << MBB->getName() << "':\n"); + + MachineFunction &MF = *MBB->getParent(); + + // MaybeDeadStores - When we need to write a value back into a stack slot, + // keep track of the inserted store. If the stack slot value is never read + // (because the value was used from some available register, for example), and + // subsequently stored to, the original store is dead. This map keeps track + // of inserted stores that are not used. If we see a subsequent store to the + // same stack slot, the original store is deleted. + std::vector<MachineInstr*> MaybeDeadStores; + MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL); + + // ReMatDefs - These are rematerializable def MIs which are not deleted. + SmallSet<MachineInstr*, 4> ReMatDefs; + + // Clear kill info. + SmallSet<unsigned, 2> KilledMIRegs; + + // Keep track of the registers we have already spilled in case there are + // multiple defs of the same register in MI. + SmallSet<unsigned, 8> SpilledMIRegs; + + RegKills.reset(); + KillOps.clear(); + KillOps.resize(TRI->getNumRegs(), NULL); + + DistanceMap.clear(); + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ) { + MachineBasicBlock::iterator NextMII = llvm::next(MII); + + if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps)) + NextMII = llvm::next(MII); + + if (InsertEmergencySpills(MII)) + NextMII = llvm::next(MII); + + InsertRestores(MII, Spills, RegKills, KillOps); + + if (InsertSpills(MII)) + NextMII = llvm::next(MII); + + bool Erased = false; + bool BackTracked = false; + MachineInstr &MI = *MII; + + // Remember DbgValue's which reference stack slots. + if (MI.isDebugValue() && MI.getOperand(0).isFI()) + Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI); + + /// ReusedOperands - Keep track of operand reuse in case we need to undo + /// reuse. + ReuseInfo ReusedOperands(MI, TRI); + SmallVector<unsigned, 4> VirtUseOps; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; // Ignore non-register operands. + + unsigned VirtReg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) { + // Ignore physregs for spilling, but remember that it is used by this + // function. + MRI->setPhysRegUsed(VirtReg); + continue; + } + + // We want to process implicit virtual register uses first. + if (MO.isImplicit()) + // If the virtual register is implicitly defined, emit a implicit_def + // before so scavenger knows it's "defined". + // FIXME: This is a horrible hack done the by register allocator to + // remat a definition with virtual register operand. + VirtUseOps.insert(VirtUseOps.begin(), i); + else + VirtUseOps.push_back(i); + } + + // Process all of the spilled uses and all non spilled reg references. + SmallVector<int, 2> PotentialDeadStoreSlots; + KilledMIRegs.clear(); + for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) { + unsigned i = VirtUseOps[j]; + unsigned VirtReg = MI.getOperand(i).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register?"); + + unsigned SubIdx = MI.getOperand(i).getSubReg(); + if (VRM->isAssignedReg(VirtReg)) { + // This virtual register was assigned a physreg! + unsigned Phys = VRM->getPhys(VirtReg); + MRI->setPhysRegUsed(Phys); + if (MI.getOperand(i).isDef()) + ReusedOperands.markClobbered(Phys); + substitutePhysReg(MI.getOperand(i), Phys, *TRI); + if (VRM->isImplicitlyDefined(VirtReg)) + // FIXME: Is this needed? + BuildMI(*MBB, &MI, MI.getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), Phys); + continue; + } + + // This virtual register is now known to be a spilled value. + if (!MI.getOperand(i).isUse()) + continue; // Handle defs in the loop below (handle use&def here though) + + bool AvoidReload = MI.getOperand(i).isUndef(); + // Check if it is defined by an implicit def. It should not be spilled. + // Note, this is for correctness reason. e.g. + // 8 %reg1024<def> = IMPLICIT_DEF + // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 + // The live range [12, 14) are not part of the r1024 live interval since + // it's defined by an implicit def. It will not conflicts with live + // interval of r1025. Now suppose both registers are spilled, you can + // easily see a situation where both registers are reloaded before + // the INSERT_SUBREG and both target registers that would overlap. + bool DoReMat = VRM->isReMaterialized(VirtReg); + int SSorRMId = DoReMat + ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); + int ReuseSlot = SSorRMId; + + // Check to see if this stack slot is available. + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); + + // If this is a sub-register use, make sure the reuse register is in the + // right register class. For example, for x86 not all of the 32-bit + // registers have accessible sub-registers. + // Similarly so for EXTRACT_SUBREG. Consider this: + // EDI = op + // MOV32_mr fi#1, EDI + // ... + // = EXTRACT_SUBREG fi#1 + // fi#1 is available in EDI, but it cannot be reused because it's not in + // the right register file. + if (PhysReg && !AvoidReload && SubIdx) { + const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); + if (!RC->contains(PhysReg)) + PhysReg = 0; + } + + if (PhysReg && !AvoidReload) { + // This spilled operand might be part of a two-address operand. If this + // is the case, then changing it will necessarily require changing the + // def part of the instruction as well. However, in some cases, we + // aren't allowed to modify the reused register. If none of these cases + // apply, reuse it. + bool CanReuse = true; + bool isTied = MI.isRegTiedToDefOperand(i); + if (isTied) { + // Okay, we have a two address operand. We can reuse this physreg as + // long as we are allowed to clobber the value and there isn't an + // earlier def that has already clobbered the physreg. + CanReuse = !ReusedOperands.isClobbered(PhysReg) && + Spills.canClobberPhysReg(PhysReg); + } + // If this is an asm, and a PhysReg alias is used elsewhere as an + // earlyclobber operand, we can't also use it as an input. + if (MI.isInlineAsm()) { + for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) { + MachineOperand &MOk = MI.getOperand(k); + if (MOk.isReg() && MOk.isEarlyClobber() && + TRI->regsOverlap(MOk.getReg(), PhysReg)) { + CanReuse = false; + DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) + << " for vreg" << VirtReg << ": " << MOk << '\n'); + break; + } + } + } + + if (CanReuse) { + // If this stack slot value is already available, reuse it! + if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) + DEBUG(dbgs() << "Reusing RM#" + << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); + else + DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); + DEBUG(dbgs() << " from physreg " + << TRI->getName(PhysReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(VRM->getPhys(VirtReg)) << '\n'); + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + + // The only technical detail we have is that we don't know that + // PhysReg won't be clobbered by a reloaded stack slot that occurs + // later in the instruction. In particular, consider 'op V1, V2'. + // If V1 is available in physreg R0, we would choose to reuse it + // here, instead of reloading it into the register the allocator + // indicated (say R1). However, V2 might have to be reloaded + // later, and it might indicate that it needs to live in R0. When + // this occurs, we need to have information available that + // indicates it is safe to use R1 for the reload instead of R0. + // + // To further complicate matters, we might conflict with an alias, + // or R0 and R1 might not be compatible with each other. In this + // case, we actually insert a reload for V1 in R1, ensuring that + // we can get at R0 or its alias. + ReusedOperands.addReuse(i, ReuseSlot, PhysReg, + VRM->getPhys(VirtReg), VirtReg); + if (isTied) + // Only mark it clobbered if this is a use&def operand. + ReusedOperands.markClobbered(PhysReg); + ++NumReused; + + if (MI.getOperand(i).isKill() && + ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) { + + // The store of this spilled value is potentially dead, but we + // won't know for certain until we've confirmed that the re-use + // above is valid, which means waiting until the other operands + // are processed. For now we just track the spill slot, we'll + // remove it after the other operands are processed if valid. + + PotentialDeadStoreSlots.push_back(ReuseSlot); + } + + // Mark is isKill if it's there no other uses of the same virtual + // register and it's not a two-address operand. IsKill will be + // unset if reg is reused. + if (!isTied && KilledMIRegs.count(VirtReg) == 0) { + MI.getOperand(i).setIsKill(); + KilledMIRegs.insert(VirtReg); + } + + continue; + } // CanReuse + + // Otherwise we have a situation where we have a two-address instruction + // whose mod/ref operand needs to be reloaded. This reload is already + // available in some register "PhysReg", but if we used PhysReg as the + // operand to our 2-addr instruction, the instruction would modify + // PhysReg. This isn't cool if something later uses PhysReg and expects + // to get its initial value. + // + // To avoid this problem, and to avoid doing a load right after a store, + // we emit a copy from PhysReg into the designated register for this + // operand. + // + // This case also applies to an earlyclobber'd PhysReg. + unsigned DesignatedReg = VRM->getPhys(VirtReg); + assert(DesignatedReg && "Must map virtreg to physreg!"); + + // Note that, if we reused a register for a previous operand, the + // register we want to reload into might not actually be + // available. If this occurs, use the register indicated by the + // reuser. + if (ReusedOperands.hasReuses()) + DesignatedReg = ReusedOperands. + GetRegForReload(VirtReg, DesignatedReg, &MI, Spills, + MaybeDeadStores, RegKills, KillOps, *VRM); + + // If the mapped designated register is actually the physreg we have + // incoming, we don't need to inserted a dead copy. + if (DesignatedReg == PhysReg) { + // If this stack slot value is already available, reuse it! + if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) + DEBUG(dbgs() << "Reusing RM#" + << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); + else + DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); + DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) + << " for vreg" << VirtReg + << " instead of reloading into same physreg.\n"); + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + ReusedOperands.markClobbered(RReg); + ++NumReused; + continue; + } + + MRI->setPhysRegUsed(DesignatedReg); + ReusedOperands.markClobbered(DesignatedReg); + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat, + SSorRMId, TII, MF); + MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY), + DesignatedReg).addReg(PhysReg); + CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); + UpdateKills(*CopyMI, TRI, RegKills, KillOps); + + // This invalidates DesignatedReg. + Spills.ClobberPhysReg(DesignatedReg); + + Spills.addAvailable(ReuseSlot, DesignatedReg); + unsigned RReg = + SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + DEBUG(dbgs() << '\t' << *prior(MII)); + ++NumReused; + continue; + } // if (PhysReg) + + // Otherwise, reload it and remember that we have it. + PhysReg = VRM->getPhys(VirtReg); + assert(PhysReg && "Must map virtreg to physreg!"); + + // Note that, if we reused a register for a previous operand, the + // register we want to reload into might not actually be + // available. If this occurs, use the register indicated by the + // reuser. + if (ReusedOperands.hasReuses()) + PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, *VRM); + + MRI->setPhysRegUsed(PhysReg); + ReusedOperands.markClobbered(PhysReg); + if (AvoidReload) + ++NumAvoided; + else { + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, DoReMat, + SSorRMId, TII, MF); + + if (DoReMat) { + ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM); + } else { + const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); + TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI); + MachineInstr *LoadMI = prior(InsertLoc); + VRM->addSpillSlotUse(SSorRMId, LoadMI); + ++NumLoads; + DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); + } + // This invalidates PhysReg. + Spills.ClobberPhysReg(PhysReg); + + // Any stores to this stack slot are not dead anymore. + if (!DoReMat) + MaybeDeadStores[SSorRMId] = NULL; + Spills.addAvailable(SSorRMId, PhysReg); + // Assumes this is the last use. IsKill will be unset if reg is reused + // unless it's a two-address operand. + if (!MI.isRegTiedToDefOperand(i) && + KilledMIRegs.count(VirtReg) == 0) { + MI.getOperand(i).setIsKill(); + KilledMIRegs.insert(VirtReg); + } + + UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); + DEBUG(dbgs() << '\t' << *prior(InsertLoc)); + } + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + } + + // Ok - now we can remove stores that have been confirmed dead. + for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) { + // This was the last use and the spilled value is still available + // for reuse. That means the spill was unnecessary! + int PDSSlot = PotentialDeadStoreSlots[j]; + MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; + if (DeadStore) { + DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); + InvalidateKills(*DeadStore, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(DeadStore); + MBB->erase(DeadStore); + MaybeDeadStores[PDSSlot] = NULL; + ++NumDSE; + } + } + + + DEBUG(dbgs() << '\t' << MI); + + + // If we have folded references to memory operands, make sure we clear all + // physical registers that may contain the value of the spilled virtual + // register + + // Copy the folded virts to a small vector, we may change MI2VirtMap. + SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts; + // C++0x FTW! + for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator, + VirtRegMap::MI2VirtMapTy::const_iterator> FVRange = + VRM->getFoldedVirts(&MI); + FVRange.first != FVRange.second; ++FVRange.first) + FoldedVirts.push_back(FVRange.first->second); + + SmallSet<int, 2> FoldedSS; + for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) { + unsigned VirtReg = FoldedVirts[FVI].first; + VirtRegMap::ModRef MR = FoldedVirts[FVI].second; + DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR); + + int SS = VRM->getStackSlot(VirtReg); + if (SS == VirtRegMap::NO_STACK_SLOT) + continue; + FoldedSS.insert(SS); + DEBUG(dbgs() << " - StackSlot: " << SS << "\n"); + + // If this folded instruction is just a use, check to see if it's a + // straight load from the virt reg slot. + if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) { + int FrameIdx; + unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx); + if (DestReg && FrameIdx == SS) { + // If this spill slot is available, turn it into a copy (or nothing) + // instead of leaving it as a load! + if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { + DEBUG(dbgs() << "Promoted Load To Copy: " << MI); + if (DestReg != InReg) { + MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg); + MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DestReg, RegState::Define, DefMO->getSubReg()) + .addReg(InReg, RegState::Kill); + // Revisit the copy so we make sure to notice the effects of the + // operation on the destreg (either needing to RA it if it's + // virtual or needing to clobber any values if it's physical). + NextMII = CopyMI; + NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse); + BackTracked = true; + } else { + DEBUG(dbgs() << "Removing now-noop copy: " << MI); + // Unset last kill since it's being reused. + InvalidateKill(InReg, TRI, RegKills, KillOps); + Spills.disallowClobberPhysReg(InReg); + } + + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + goto ProcessNextInst; + } + } else { + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); + SmallVector<MachineInstr*, 4> NewMIs; + if (PhysReg && + TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){ + MBB->insert(MII, NewMIs[0]); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + --NextMII; // backtrack to the unfolded instruction. + BackTracked = true; + goto ProcessNextInst; + } + } + } + + // If this reference is not a use, any previous store is now dead. + // Otherwise, the store to this stack slot is not dead anymore. + MachineInstr* DeadStore = MaybeDeadStores[SS]; + if (DeadStore) { + bool isDead = !(MR & VirtRegMap::isRef); + MachineInstr *NewStore = NULL; + if (MR & VirtRegMap::isModRef) { + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); + SmallVector<MachineInstr*, 4> NewMIs; + // We can reuse this physreg as long as we are allowed to clobber + // the value and there isn't an earlier def that has already clobbered + // the physreg. + if (PhysReg && + !ReusedOperands.isClobbered(PhysReg) && + Spills.canClobberPhysReg(PhysReg) && + !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable! + MachineOperand *KillOpnd = + DeadStore->findRegisterUseOperand(PhysReg, true); + // Note, if the store is storing a sub-register, it's possible the + // super-register is needed below. + if (KillOpnd && !KillOpnd->getSubReg() && + TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){ + MBB->insert(MII, NewMIs[0]); + NewStore = NewMIs[1]; + MBB->insert(MII, NewStore); + VRM->addSpillSlotUse(SS, NewStore); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + --NextMII; + --NextMII; // backtrack to the unfolded instruction. + BackTracked = true; + isDead = true; + ++NumSUnfold; + } + } + } + + if (isDead) { // Previous store is dead. + // If we get here, the store is dead, nuke it now. + DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); + InvalidateKills(*DeadStore, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(DeadStore); + MBB->erase(DeadStore); + if (!NewStore) + ++NumDSE; + } + + MaybeDeadStores[SS] = NULL; + if (NewStore) { + // Treat this store as a spill merged into a copy. That makes the + // stack slot value available. + VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod); + goto ProcessNextInst; + } + } + + // If the spill slot value is available, and this is a new definition of + // the value, the value is not available anymore. + if (MR & VirtRegMap::isMod) { + // Notice that the value in this stack slot has been modified. + Spills.ModifyStackSlotOrReMat(SS); + + // If this is *just* a mod of the value, check to see if this is just a + // store to the spill slot (i.e. the spill got merged into the copy). If + // so, realize that the vreg is available now, and add the store to the + // MaybeDeadStore info. + int StackSlot; + if (!(MR & VirtRegMap::isRef)) { + if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) { + assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && + "Src hasn't been allocated yet?"); + + if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot, + Spills, RegKills, KillOps, TRI)) { + NextMII = llvm::next(MII); + BackTracked = true; + goto ProcessNextInst; + } + + // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark + // this as a potentially dead store in case there is a subsequent + // store into the stack slot without a read from it. + MaybeDeadStores[StackSlot] = &MI; + + // If the stack slot value was previously available in some other + // register, change it now. Otherwise, make the register + // available in PhysReg. + Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg)); + } + } + } + } + + // Process all of the spilled defs. + SpilledMIRegs.clear(); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!(MO.isReg() && MO.getReg() && MO.isDef())) + continue; + + unsigned VirtReg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) { + // Check to see if this is a noop copy. If so, eliminate the + // instruction before considering the dest reg to be changed. + // Also check if it's copying from an "undef", if so, we can't + // eliminate this or else the undef marker is lost and it will + // confuses the scavenger. This is extremely rare. + if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() && + MI.getNumOperands() == 2) { + ++NumDCE; + DEBUG(dbgs() << "Removing now-noop copy: " << MI); + SmallVector<unsigned, 2> KillRegs; + InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); + if (MO.isDead() && !KillRegs.empty()) { + // Source register or an implicit super/sub-register use is killed. + assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg())); + // Last def is now dead. + TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps); + } + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + Spills.disallowClobberPhysReg(VirtReg); + goto ProcessNextInst; + } + + // If it's not a no-op copy, it clobbers the value in the destreg. + Spills.ClobberPhysReg(VirtReg); + ReusedOperands.markClobbered(VirtReg); + + // Check to see if this instruction is a load from a stack slot into + // a register. If so, this provides the stack slot value in the reg. + int FrameIdx; + if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) { + assert(DestReg == VirtReg && "Unknown load situation!"); + + // If it is a folded reference, then it's not safe to clobber. + bool Folded = FoldedSS.count(FrameIdx); + // Otherwise, if it wasn't available, remember that it is now! + Spills.addAvailable(FrameIdx, DestReg, !Folded); + goto ProcessNextInst; + } + + continue; + } + + unsigned SubIdx = MO.getSubReg(); + bool DoReMat = VRM->isReMaterialized(VirtReg); + if (DoReMat) + ReMatDefs.insert(&MI); + + // The only vregs left are stack slot definitions. + int StackSlot = VRM->getStackSlot(VirtReg); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + + // If this def is part of a two-address operand, make sure to execute + // the store from the correct physical register. + unsigned PhysReg; + unsigned TiedOp; + if (MI.isRegTiedToUseOperand(i, &TiedOp)) { + PhysReg = MI.getOperand(TiedOp).getReg(); + if (SubIdx) { + unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI); + assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg && + "Can't find corresponding super-register!"); + PhysReg = SuperReg; + } + } else { + PhysReg = VRM->getPhys(VirtReg); + if (ReusedOperands.isClobbered(PhysReg)) { + // Another def has taken the assigned physreg. It must have been a + // use&def which got it due to reuse. Undo the reuse! + PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, *VRM); + } + } + + assert(PhysReg && "VR not assigned a physical register?"); + MRI->setPhysRegUsed(PhysReg); + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + ReusedOperands.markClobbered(RReg); + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + + if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) { + MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; + SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true, + LastStore, Spills, ReMatDefs, RegKills, KillOps); + NextMII = llvm::next(MII); + + // Check to see if this is a noop copy. If so, eliminate the + // instruction before considering the dest reg to be changed. + if (MI.isIdentityCopy()) { + ++NumDCE; + DEBUG(dbgs() << "Removing now-noop copy: " << MI); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + UpdateKills(*LastStore, TRI, RegKills, KillOps); + goto ProcessNextInst; + } + } + } + ProcessNextInst: + // Delete dead instructions without side effects. + if (!Erased && !BackTracked && isSafeToDelete(MI)) { + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + } + if (!Erased) + DistanceMap.insert(std::make_pair(&MI, DistanceMap.size())); + if (!Erased && !BackTracked) { + for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II) + UpdateKills(*II, TRI, RegKills, KillOps); + } + MII = NextMII; + } + +} + +llvm::VirtRegRewriter* llvm::createVirtRegRewriter() { + switch (RewriterOpt) { + default: llvm_unreachable("Unreachable!"); + case local: + return new LocalRewriter(); + case trivial: + return new TrivialRewriter(); + } +} diff --git a/contrib/llvm/lib/CodeGen/VirtRegRewriter.h b/contrib/llvm/lib/CodeGen/VirtRegRewriter.h new file mode 100644 index 0000000..93474e0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/VirtRegRewriter.h @@ -0,0 +1,32 @@ +//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H +#define LLVM_CODEGEN_VIRTREGREWRITER_H + +namespace llvm { + class LiveIntervals; + class MachineFunction; + class VirtRegMap; + + /// VirtRegRewriter interface: Implementations of this interface assign + /// spilled virtual registers to stack slots, rewriting the code. + struct VirtRegRewriter { + virtual ~VirtRegRewriter(); + virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, + LiveIntervals* LIs) = 0; + }; + + /// createVirtRegRewriter - Create an return a rewriter object, as specified + /// on the command line. + VirtRegRewriter* createVirtRegRewriter(); + +} + +#endif |