diff options
author | dim <dim@FreeBSD.org> | 2011-07-17 15:36:56 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2011-07-17 15:36:56 +0000 |
commit | 1176aa52646fe641a4243a246aa7f960c708a274 (patch) | |
tree | c8086addb211fa670a9d2b1038d8c2e453229755 /lib/CodeGen | |
parent | ece02cd5829cea836e9365b0845a8ef042d17b0a (diff) | |
download | FreeBSD-src-1176aa52646fe641a4243a246aa7f960c708a274.zip FreeBSD-src-1176aa52646fe641a4243a246aa7f960c708a274.tar.gz |
Vendor import of llvm trunk r135360:
http://llvm.org/svn/llvm-project/llvm/trunk@135360
Diffstat (limited to 'lib/CodeGen')
93 files changed, 5162 insertions, 4743 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index dca1d29..25842a7 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "post-RA-sched" #include "AggressiveAntiDepBreaker.h" +#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" @@ -114,12 +115,13 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg) AggressiveAntiDepBreaker:: AggressiveAntiDepBreaker(MachineFunction& MFi, - TargetSubtarget::RegClassVector& CriticalPathRCs) : + const RegisterClassInfo &RCI, + TargetSubtargetInfo::RegClassVector& CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), - AllocatableSet(TRI->getAllocatableSet(MF)), + RegClassInfo(RCI), State(NULL) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ @@ -402,7 +404,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) - RC = MI->getDesc().OpInfo[i].getRegClass(TRI); + RC = TII->getRegClass(MI->getDesc(), i, TRI); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } @@ -477,7 +479,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) - RC = MI->getDesc().OpInfo[i].getRegClass(TRI); + RC = TII->getRegClass(MI->getDesc(), i, TRI); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } @@ -618,9 +620,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( const TargetRegisterClass *SuperRC = TRI->getMinimalPhysRegClass(SuperReg, MVT::Other); - const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF); - const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF); - if (RB == RE) { + ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC); + if (Order.empty()) { DEBUG(dbgs() << "\tEmpty Super Regclass!!\n"); return false; } @@ -628,17 +629,17 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( DEBUG(dbgs() << "\tFind Registers:"); if (RenameOrder.count(SuperRC) == 0) - RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE)); + RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size())); - const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC]; - const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR); - TargetRegisterClass::iterator R = OrigR; + unsigned OrigR = RenameOrder[SuperRC]; + unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR); + unsigned R = OrigR; do { - if (R == RB) R = RE; + if (R == 0) R = Order.size(); --R; - const unsigned NewSuperReg = *R; + const unsigned NewSuperReg = Order[R]; // Don't consider non-allocatable registers - if (!AllocatableSet.test(NewSuperReg)) continue; + if (!RegClassInfo.isAllocatable(NewSuperReg)) continue; // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; @@ -819,7 +820,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!AllocatableSet.test(AntiDepReg)) { + if (!RegClassInfo.isAllocatable(AntiDepReg)) { // Don't break anti-dependencies on non-allocatable registers. DEBUG(dbgs() << " (non-allocatable)\n"); continue; diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index b7ddafc..7067784 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -23,13 +23,15 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include <map> namespace llvm { +class RegisterClassInfo; + /// Class AggressiveAntiDepState /// Contains all the state necessary for anti-dep breaking. class AggressiveAntiDepState { @@ -117,11 +119,7 @@ namespace llvm { MachineRegisterInfo &MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - - /// AllocatableSet - The set of allocatable registers. - /// We'll be ignoring anti-dependencies on non-allocatable registers, - /// because they may not be safe to break. - const BitVector AllocatableSet; + const RegisterClassInfo &RegClassInfo; /// CriticalPathSet - The set of registers that should only be /// renamed if they are on the critical path. @@ -133,7 +131,8 @@ namespace llvm { public: AggressiveAntiDepBreaker(MachineFunction& MFi, - TargetSubtarget::RegClassVector& CriticalPathRCs); + const RegisterClassInfo &RCI, + TargetSubtargetInfo::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker(); /// Start - Initialize anti-dep breaking for a new basic block. @@ -158,8 +157,8 @@ namespace llvm { void FinishBlock(); private: - typedef std::map<const TargetRegisterClass *, - TargetRegisterClass::const_iterator> RenameOrderType; + /// Keep track of a position in the allocation order for each regclass. + typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType; /// IsImplicitDefUse - Return true if MO represents a register /// that is both implicitly used and defined in MI diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index a8ee2b6..1005f10 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -41,21 +41,19 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, if (HintPair.first) { const TargetRegisterInfo &TRI = VRM.getTargetRegInfo(); // The remaining allocation order may depend on the hint. - const unsigned *B, *E; - tie(B, E) = TRI.getAllocationOrder(RC, HintPair.first, Hint, - VRM.getMachineFunction()); - - // Empty allocation order? - if (B == E) + ArrayRef<unsigned> Order = + TRI.getRawAllocationOrder(RC, HintPair.first, Hint, + VRM.getMachineFunction()); + if (Order.empty()) return; // Copy the allocation order with reserved registers removed. OwnedBegin = true; - unsigned *P = new unsigned[E - B]; + unsigned *P = new unsigned[Order.size()]; Begin = P; - for (; B != E; ++B) - if (!RCI.isReserved(*B)) - *P++ = *B; + for (unsigned i = 0; i != Order.size(); ++i) + if (!RCI.isReserved(Order[i])) + *P++ = Order[i]; End = P; // Target-dependent hints require resolution. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 161afba..7f314ee 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -575,6 +575,8 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } } else if (MI->getOperand(0).isImm()) { OS << MI->getOperand(0).getImm(); + } else if (MI->getOperand(0).isCImm()) { + MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); } else { assert(MI->getOperand(0).isReg() && "Unknown operand type"); if (MI->getOperand(0).getReg() == 0) { @@ -1211,9 +1213,9 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each /// global in the specified llvm.used list for which emitUsedDirectiveFor /// is true, as being used with this directive. -void AsmPrinter::EmitLLVMUsedList(Constant *List) { +void AsmPrinter::EmitLLVMUsedList(const Constant *List) { // Should be an array of 'i8*'. - ConstantArray *InitList = dyn_cast<ConstantArray>(List); + const ConstantArray *InitList = dyn_cast<ConstantArray>(List); if (InitList == 0) return; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { @@ -1226,11 +1228,11 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) { /// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the /// function pointers, ignoring the init priority. -void AsmPrinter::EmitXXStructorList(Constant *List) { +void AsmPrinter::EmitXXStructorList(const Constant *List) { // Should be an array of '{ int, void ()* }' structs. The first value is the // init priority, which we ignore. if (!isa<ConstantArray>(List)) return; - ConstantArray *InitList = cast<ConstantArray>(List); + const ConstantArray *InitList = cast<ConstantArray>(List); for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. @@ -1516,6 +1518,13 @@ static void EmitGlobalConstantVector(const ConstantVector *CV, unsigned AddrSpace, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); + + const TargetData &TD = *AP.TM.getTargetData(); + unsigned Size = TD.getTypeAllocSize(CV->getType()); + unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) * + CV->getType()->getNumElements(); + if (unsigned Padding = Size - EmittedSize) + AP.OutStreamer.EmitZeros(Padding, AddrSpace); } static void EmitGlobalConstantStruct(const ConstantStruct *CS, @@ -1925,7 +1934,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return false; // The predecessor has to be immediately before this block. - const MachineBasicBlock *Pred = *PI; + MachineBasicBlock *Pred = *PI; if (!Pred->isLayoutSuccessor(MBB)) return false; @@ -1934,9 +1943,28 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { if (Pred->empty()) return true; - // Otherwise, check the last instruction. - const MachineInstr &LastInst = Pred->back(); - return !LastInst.getDesc().isBarrier(); + // Check the terminators in the previous blocks + for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(), + IE = Pred->end(); II != IE; ++II) { + MachineInstr &MI = *II; + + // If it is not a simple branch, we are in a table somewhere. + if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch()) + return false; + + // If we are the operands of one of the branches, this is not + // a fall through. + for (MachineInstr::mop_iterator OI = MI.operands_begin(), + OE = MI.operands_end(); OI != OE; ++OI) { + const MachineOperand& OP = *OI; + if (OP.isJTI()) + return false; + if (OP.isMBB() && OP.getMBB() == MBB) + return false; + } + } + + return true; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index c6166e2..5ac455e 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetAsmParser.h" #include "llvm/Target/TargetMachine.h" @@ -111,7 +112,16 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI)); - OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*Parser, TM)); + + // FIXME: It would be nice if we can avoid createing a new instance of + // MCSubtargetInfo here given TargetSubtargetInfo is available. However, + // we have to watch out for asm directives which can change subtarget + // state. e.g. .code 16, .code 32. + OwningPtr<MCSubtargetInfo> + STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(), + TM.getTargetCPU(), + TM.getTargetFeatureString())); + OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*STI, *Parser)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index bff1a35..1fe035e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -491,7 +491,7 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { } /// addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, ConstantInt *CI, +bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned) { unsigned CIBitWidth = CI->getBitWidth(); if (CIBitWidth <= 64) { diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 60a9b28..213c7fc 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -181,7 +181,7 @@ public: /// addConstantValue - Add constant value entry in variable DIE. bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); - bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); + bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); /// addConstantFPValue - Add constant value entry in variable DIE. bool addConstantFPValue(DIE *Die, const MachineOperand &MO); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 8845bfa..125e1e8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -229,6 +229,7 @@ public: void DbgScope::dump() const { raw_ostream &err = dbgs(); err.indent(IndentLevel); + err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n"; const MDNode *N = Desc; N->dump(); if (AbstractScope) @@ -618,6 +619,21 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { return ScopeDIE; } +/// isUnsignedDIType - Return true if type encoding is unsigned. +static bool isUnsignedDIType(DIType Ty) { + DIDerivedType DTy(Ty); + if (DTy.Verify()) + return isUnsignedDIType(DTy.getTypeDerivedFrom()); + + DIBasicType BTy(Ty); + if (BTy.Verify()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char) + return true; + } + return false; +} /// constructVariableDIE - Construct a DIE for the given DbgVariable. DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { @@ -718,6 +734,11 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { else if (DVInsn->getOperand(0).isFPImm()) updated = VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + updated = + VariableCU->addConstantValue(VariableDie, + DVInsn->getOperand(0).getCImm(), + isUnsignedDIType(DV->getType())); } else { VariableCU->addVariableAddress(DV, VariableDie, Asm->getDebugValueLocation(DVInsn)); @@ -913,22 +934,6 @@ CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const { return I->second; } -/// isUnsignedDIType - Return true if type encoding is unsigned. -static bool isUnsignedDIType(DIType Ty) { - DIDerivedType DTy(Ty); - if (DTy.Verify()) - return isUnsignedDIType(DTy.getTypeDerivedFrom()); - - DIBasicType BTy(Ty); - if (BTy.Verify()) { - unsigned Encoding = BTy.getEncoding(); - if (Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char) - return true; - } - return false; -} - // Return const exprssion if value is a GEP to access merged global // constant. e.g. // i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) @@ -1017,7 +1022,7 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { } else { TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } - } else if (ConstantInt *CI = + } else if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { @@ -1310,7 +1315,6 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, void DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, SmallPtrSet<const MDNode *, 16> &Processed) { - const LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) { @@ -1320,11 +1324,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, DIVariable DV(Var); const std::pair<unsigned, DebugLoc> &VP = VI->second; - DbgScope *Scope = 0; - if (const MDNode *IA = VP.second.getInlinedAt(Ctx)) - Scope = ConcreteScopes.lookup(IA); - if (Scope == 0) - Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx)); + DbgScope *Scope = findDbgScope(VP.second); // If variable scope is not found then skip this variable. if (Scope == 0) @@ -1351,6 +1351,34 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; } +/// getDebugLocEntry - Get .debug_loc entry for the instraction range starting +/// at MI. +static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, + const MCSymbol *FLabel, + const MCSymbol *SLabel, + const MachineInstr *MI) { + const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + + if (MI->getNumOperands() != 3) { + MachineLocation MLoc = Asm->getDebugValueLocation(MI); + return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); + } + if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) { + MachineLocation MLoc; + MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); + } + if (MI->getOperand(0).isImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm()); + if (MI->getOperand(0).isFPImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm()); + if (MI->getOperand(0).isCImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm()); + + assert (0 && "Unexpected 3 operand DBG_VALUE instruction!"); + return DotDebugLocEntry(); +} + /// collectVariableInfo - Populate DbgScope entries with variables' info. void DwarfDebug::collectVariableInfo(const MachineFunction *MF, @@ -1379,7 +1407,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, DISubprogram(DV.getContext()).describes(MF->getFunction())) Scope = CurrentFnDbgScope; else - Scope = findDbgScope(MInsn); + Scope = findDbgScope(MInsn->getDebugLoc()); // If variable scope is not found then skip this variable. if (!Scope) continue; @@ -1424,6 +1452,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, SLabel = FunctionEndSym; else { const MachineInstr *End = HI[1]; + DEBUG(dbgs() << "DotDebugLoc Pair:\n" + << "\t" << *Begin << "\t" << *End << "\n"); if (End->isDebugValue()) SLabel = getLabelBeforeInsn(End); else { @@ -1435,25 +1465,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } // The value is valid until the next DBG_VALUE or clobber. - MachineLocation MLoc; - if (Begin->getNumOperands() == 3) { - if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) { - MLoc.set(Begin->getOperand(0).getReg(), - Begin->getOperand(1).getImm()); - DotDebugLocEntries. - push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var)); - } - // FIXME: Handle isFPImm also. - else if (Begin->getOperand(0).isImm()) { - DotDebugLocEntries. - push_back(DotDebugLocEntry(FLabel, SLabel, - Begin->getOperand(0).getImm())); - } - } else { - MLoc = Asm->getDebugValueLocation(Begin); - DotDebugLocEntries. - push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var)); - } + DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin)); } DotDebugLocEntries.push_back(DotDebugLocEntry()); } @@ -1550,8 +1562,12 @@ void DwarfDebug::endInstruction(const MachineInstr *MI) { } /// getOrCreateDbgScope - Create DbgScope for the scope. -DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, - const MDNode *InlinedAt) { +DbgScope *DwarfDebug::getOrCreateDbgScope(DebugLoc DL) { + LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; + DL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx); + if (!InlinedAt) { DbgScope *WScope = DbgScopeMap.lookup(Scope); if (WScope) @@ -1560,22 +1576,12 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, DbgScopeMap.insert(std::make_pair(Scope, WScope)); if (DIDescriptor(Scope).isLexicalBlock()) { DbgScope *Parent = - getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL); + getOrCreateDbgScope(DebugLoc::getFromDILexicalBlock(Scope)); WScope->setParent(Parent); Parent->addScope(WScope); - } - - if (!WScope->getParent()) { - StringRef SPName = DISubprogram(Scope).getLinkageName(); - // We used to check only for a linkage name, but that fails - // since we began omitting the linkage name for private - // functions. The new way is to check for the name in metadata, - // but that's not supported in old .ll test cases. Ergo, we - // check both. - if (SPName == Asm->MF->getFunction()->getName() || - DISubprogram(Scope).getFunction() == Asm->MF->getFunction()) - CurrentFnDbgScope = WScope; - } + } else if (DIDescriptor(Scope).isSubprogram() + && DISubprogram(Scope).describes(Asm->MF->getFunction())) + CurrentFnDbgScope = WScope; return WScope; } @@ -1587,37 +1593,14 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt); DbgScopeMap.insert(std::make_pair(InlinedAt, WScope)); - DILocation DL(InlinedAt); + InlinedDbgScopeMap[DebugLoc::getFromDILocation(InlinedAt)] = WScope; DbgScope *Parent = - getOrCreateDbgScope(DL.getScope(), DL.getOrigLocation()); + getOrCreateDbgScope(DebugLoc::getFromDILocation(InlinedAt)); WScope->setParent(Parent); Parent->addScope(WScope); - - ConcreteScopes[InlinedAt] = WScope; - return WScope; } -/// hasValidLocation - Return true if debug location entry attached with -/// machine instruction encodes valid location info. -static bool hasValidLocation(LLVMContext &Ctx, - const MachineInstr *MInsn, - const MDNode *&Scope, const MDNode *&InlinedAt) { - DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) return false; - - const MDNode *S = DL.getScope(Ctx); - - // There is no need to create another DIE for compile unit. For all - // other scopes, create one DbgScope now. This will be translated - // into a scope DIE at the end. - if (DIScope(S).isCompileUnit()) return false; - - Scope = S; - InlinedAt = DL.getInlinedAt(Ctx); - return true; -} - /// calculateDominanceGraph - Calculate dominance graph for DbgScope /// hierarchy. static void calculateDominanceGraph(DbgScope *Scope) { @@ -1648,21 +1631,24 @@ static void calculateDominanceGraph(DbgScope *Scope) { /// printDbgScopeInfo - Print DbgScope info for each machine instruction. static -void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, +void printDbgScopeInfo(const MachineFunction *MF, DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap) { #ifndef NDEBUG + LLVMContext &Ctx = MF->getFunction()->getContext(); unsigned PrevDFSIn = 0; for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - const MDNode *Scope = NULL; - const MDNode *InlinedAt = NULL; + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; // Check if instruction has valid location information. - if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + DebugLoc MIDL = MInsn->getDebugLoc(); + if (!MIDL.isUnknown()) { + MIDL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx); dbgs() << " [ "; if (InlinedAt) dbgs() << "*"; @@ -1692,11 +1678,9 @@ bool DwarfDebug::extractScopeInformation() { return false; // Scan each instruction and create scopes. First build working set of scopes. - LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); SmallVector<DbgRange, 4> MIRanges; DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap; - const MDNode *PrevScope = NULL; - const MDNode *PrevInlinedAt = NULL; + DebugLoc PrevDL; const MachineInstr *RangeBeginMI = NULL; const MachineInstr *PrevMI = NULL; for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); @@ -1704,17 +1688,16 @@ bool DwarfDebug::extractScopeInformation() { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - const MDNode *Scope = NULL; - const MDNode *InlinedAt = NULL; // Check if instruction has valid location information. - if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + const DebugLoc MIDL = MInsn->getDebugLoc(); + if (MIDL.isUnknown()) { PrevMI = MInsn; continue; } // If scope has not changed then skip this instruction. - if (Scope == PrevScope && PrevInlinedAt == InlinedAt) { + if (MIDL == PrevDL) { PrevMI = MInsn; continue; } @@ -1727,9 +1710,13 @@ bool DwarfDebug::extractScopeInformation() { // If we have alread seen a beginning of a instruction range and // current instruction scope does not match scope of first instruction // in this range then create a new instruction range. + DEBUG(dbgs() << "Creating new instruction range :\n"); + DEBUG(dbgs() << "Begin Range at " << *RangeBeginMI); + DEBUG(dbgs() << "End Range at " << *PrevMI); + DEBUG(dbgs() << "Next Range starting at " << *MInsn); + DEBUG(dbgs() << "------------------------\n"); DbgRange R(RangeBeginMI, PrevMI); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, - PrevInlinedAt); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL); MIRanges.push_back(R); } @@ -1738,16 +1725,15 @@ bool DwarfDebug::extractScopeInformation() { // Reset previous markers. PrevMI = MInsn; - PrevScope = Scope; - PrevInlinedAt = InlinedAt; + PrevDL = MIDL; } } // Create last instruction range. - if (RangeBeginMI && PrevMI && PrevScope) { + if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) { DbgRange R(RangeBeginMI, PrevMI); MIRanges.push_back(R); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL); } if (!CurrentFnDbgScope) @@ -1755,7 +1741,7 @@ bool DwarfDebug::extractScopeInformation() { calculateDominanceGraph(CurrentFnDbgScope); if (PrintDbgScope) - printDbgScopeInfo(Ctx, Asm->MF, MI2ScopeMap); + printDbgScopeInfo(Asm->MF, MI2ScopeMap); // Find ranges of instructions covered by each DbgScope; DbgScope *PrevDbgScope = NULL; @@ -1842,8 +1828,6 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); - /// ProcessedArgs - Collection of arguments already processed. - SmallPtrSet<const MDNode *, 8> ProcessedArgs; const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); /// LiveUserVar - Map physreg numbers to the MDNode they contain. std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs()); @@ -1883,8 +1867,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (Prev->isDebugValue()) { // Coalesce identical entries at the end of History. if (History.size() >= 2 && - Prev->isIdenticalTo(History[History.size() - 2])) + Prev->isIdenticalTo(History[History.size() - 2])) { + DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n" + << "\t" << *Prev + << "\t" << *History[History.size() - 2] << "\n"); History.pop_back(); + } // Terminate old register assignments that don't reach MI; MachineFunction::const_iterator PrevMBB = Prev->getParent(); @@ -1894,9 +1882,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // its basic block. MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr(); - if (LastMI == PrevMBB->end()) + if (LastMI == PrevMBB->end()) { // Drop DBG_VALUE for empty range. + DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n" + << "\t" << *Prev << "\n"); History.pop_back(); + } else { // Terminate after LastMI. History.push_back(LastMI); @@ -2053,10 +2044,10 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DbgVariableToFrameIndexMap.clear(); VarToAbstractVarMap.clear(); DbgVariableToDbgInstMap.clear(); + InlinedDbgScopeMap.clear(); DeleteContainerSeconds(DbgScopeMap); UserVariables.clear(); DbgValues.clear(); - ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); AbstractScopesList.clear(); AbstractVariables.clear(); @@ -2083,22 +2074,17 @@ bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) { return true; } -/// findDbgScope - Find DbgScope for the debug loc attached with an -/// instruction. -DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { - DbgScope *Scope = NULL; - LLVMContext &Ctx = - MInsn->getParent()->getParent()->getFunction()->getContext(); - DebugLoc DL = MInsn->getDebugLoc(); - +/// findDbgScope - Find DbgScope for the debug loc. +DbgScope *DwarfDebug::findDbgScope(DebugLoc DL) { if (DL.isUnknown()) - return Scope; + return NULL; - if (const MDNode *IA = DL.getInlinedAt(Ctx)) - Scope = ConcreteScopes.lookup(IA); - if (Scope == 0) + DbgScope *Scope = NULL; + LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); + if (MDNode *IA = DL.getInlinedAt(Ctx)) + Scope = InlinedDbgScopeMap.lookup(DebugLoc::getFromDILocation(IA)); + else Scope = DbgScopeMap.lookup(DL.getScope(Ctx)); - return Scope; } @@ -2597,56 +2583,61 @@ void DwarfDebug::emitDebugLoc() { MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); Asm->EmitLabelDifference(end, begin, 2); Asm->OutStreamer.EmitLabel(begin); - if (Entry.isConstant()) { + if (Entry.isInt()) { DIBasicType BTy(DV.getType()); if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { Asm->OutStreamer.AddComment("DW_OP_consts"); Asm->EmitInt8(dwarf::DW_OP_consts); - Asm->EmitSLEB128(Entry.getConstant()); + Asm->EmitSLEB128(Entry.getInt()); } else { Asm->OutStreamer.AddComment("DW_OP_constu"); Asm->EmitInt8(dwarf::DW_OP_constu); - Asm->EmitULEB128(Entry.getConstant()); + Asm->EmitULEB128(Entry.getInt()); } - } else if (DV.hasComplexAddress()) { - unsigned N = DV.getNumAddrElements(); - unsigned i = 0; - if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - if (Entry.Loc.getOffset()) { - i = 2; - Asm->EmitDwarfRegOp(Entry.Loc); - Asm->OutStreamer.AddComment("DW_OP_deref"); - Asm->EmitInt8(dwarf::DW_OP_deref); - Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); - Asm->EmitInt8(dwarf::DW_OP_plus_uconst); - Asm->EmitSLEB128(DV.getAddrElement(1)); + } else if (Entry.isLocation()) { + if (!DV.hasComplexAddress()) + // Regular entry. + Asm->EmitDwarfRegOp(Entry.Loc); + else { + // Complex address entry. + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + if (Entry.Loc.getOffset()) { + i = 2; + Asm->EmitDwarfRegOp(Entry.Loc); + Asm->OutStreamer.AddComment("DW_OP_deref"); + Asm->EmitInt8(dwarf::DW_OP_deref); + Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitSLEB128(DV.getAddrElement(1)); + } else { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitDwarfRegOp(Loc); + i = 2; + } } else { - // If first address element is OpPlus then emit - // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); - Asm->EmitDwarfRegOp(Loc); - i = 2; + Asm->EmitDwarfRegOp(Entry.Loc); + } + + // Emit remaining complex address elements. + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitULEB128(DV.getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) + Asm->EmitInt8(dwarf::DW_OP_deref); + else llvm_unreachable("unknown Opcode found in complex address"); } - } else { - Asm->EmitDwarfRegOp(Entry.Loc); - } - - // Emit remaining complex address elements. - for (; i < N; ++i) { - uint64_t Element = DV.getAddrElement(i); - if (Element == DIBuilder::OpPlus) { - Asm->EmitInt8(dwarf::DW_OP_plus_uconst); - Asm->EmitULEB128(DV.getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) - Asm->EmitInt8(dwarf::DW_OP_deref); - else llvm_unreachable("unknown Opcode found in complex address"); } - } else { - // Regular entry. - Asm->EmitDwarfRegOp(Entry.Loc); } + // else ... ignore constant fp. There is not any good way to + // to represent them here in dwarf. Asm->OutStreamer.EmitLabel(end); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index abda2e6..b245006 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -69,17 +69,35 @@ typedef struct DotDebugLocEntry { const MDNode *Variable; bool Merged; bool Constant; - int64_t iConstant; + enum EntryType { + E_Location, + E_Integer, + E_ConstantFP, + E_ConstantInt + }; + enum EntryType EntryKind; + + union { + int64_t Int; + const ConstantFP *CFP; + const ConstantInt *CIP; + } Constants; DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false), - Constant(false), iConstant(0) {} + Constant(false) { Constants.Int = 0;} DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, const MDNode *V) : Begin(B), End(E), Loc(L), Variable(V), Merged(false), - Constant(false), iConstant(0) {} + Constant(false) { Constants.Int = 0; EntryKind = E_Location; } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) : Begin(B), End(E), Variable(0), Merged(false), - Constant(true), iConstant(i) {} + Constant(true) { Constants.Int = i; EntryKind = E_Integer; } + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) + : Begin(B), End(E), Variable(0), Merged(false), + Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; } + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr) + : Begin(B), End(E), Variable(0), Merged(false), + Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; } /// Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. @@ -91,8 +109,13 @@ typedef struct DotDebugLocEntry { Next->Begin = Begin; Merged = true; } - bool isConstant() { return Constant; } - int64_t getConstant() { return iConstant; } + bool isLocation() const { return EntryKind == E_Location; } + bool isInt() const { return EntryKind == E_Integer; } + bool isConstantFP() const { return EntryKind == E_ConstantFP; } + bool isConstantInt() const { return EntryKind == E_ConstantInt; } + int64_t getInt() { return Constants.Int; } + const ConstantFP *getConstantFP() { return Constants.CFP; } + const ConstantInt *getConstantInt() { return Constants.CIP; } } DotDebugLocEntry; //===----------------------------------------------------------------------===// @@ -178,12 +201,10 @@ class DwarfDebug { /// DbgScopeMap - Tracks the scopes in the current function. Owns the /// contained DbgScope*s. - /// DenseMap<const MDNode *, DbgScope *> DbgScopeMap; - /// ConcreteScopes - Tracks the concrete scopees in the current function. - /// These scopes are also included in DbgScopeMap. - DenseMap<const MDNode *, DbgScope *> ConcreteScopes; + /// InlinedDbgScopeMap - Tracks inlined function scopes in current function. + DenseMap<DebugLoc, DbgScope *> InlinedDbgScopeMap; /// AbstractScopes - Tracks the abstract scopes a module. These scopes are /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s. @@ -296,7 +317,7 @@ private: void assignAbbrevNumber(DIEAbbrev &Abbrev); /// getOrCreateDbgScope - Create DbgScope for the scope. - DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); + DbgScope *getOrCreateDbgScope(DebugLoc DL); DbgScope *getOrCreateAbstractScope(const MDNode *N); @@ -427,9 +448,8 @@ private: /// is found. Update FI to hold value of the index. bool findVariableFrameIndex(const DbgVariable *V, int *FI); - /// findDbgScope - Find DbgScope for the debug loc attached with an - /// instruction. - DbgScope *findDbgScope(const MachineInstr *MI); + /// findDbgScope - Find DbgScope for the debug loc. + DbgScope *findDbgScope(DebugLoc DL); /// identifyScopeMarkers() - Indentify instructions that are marking /// beginning of or end of a scope. diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 967a278..1f992fa 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -512,6 +512,8 @@ void DwarfException::EmitExceptionTable() { SizeAlign = 0; } + bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); + // SjLj Exception handling if (IsSJLJ) { Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); @@ -525,14 +527,30 @@ void DwarfException::EmitExceptionTable() { I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { const CallSiteEntry &S = *I; + if (VerboseAsm) { + // Emit comments that decode the call site. + Asm->OutStreamer.AddComment(Twine(">> Call Site ") + + llvm::utostr(idx) + " <<"); + Asm->OutStreamer.AddComment(Twine(" On exception at call site ") + + llvm::utostr(idx)); + + if (S.Action == 0) + Asm->OutStreamer.AddComment(" Action: cleanup"); + else + Asm->OutStreamer.AddComment(Twine(" Action: ") + + llvm::utostr((S.Action - 1) / 2 + 1)); + + Asm->OutStreamer.AddBlankLine(); + } + // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. - Asm->EmitULEB128(idx, "Landing pad"); + Asm->EmitULEB128(idx); // Offset of the first associated action record, relative to the start of // the action table. This value is biased by 1 (1 indicates the start of // the action table), and 0 indicates that there are no actions. - Asm->EmitULEB128(S.Action, "Action"); + Asm->EmitULEB128(S.Action); } } else { // DWARF Exception handling @@ -562,6 +580,7 @@ void DwarfException::EmitExceptionTable() { // Add extra padding if it wasn't added to the TType base offset. Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); + unsigned Entry = 0; for (SmallVectorImpl<CallSiteEntry>::const_iterator I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { const CallSiteEntry &S = *I; @@ -576,19 +595,38 @@ void DwarfException::EmitExceptionTable() { if (EndLabel == 0) EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); + if (VerboseAsm) { + // Emit comments that decode the call site. + Asm->OutStreamer.AddComment(Twine(">> Call Site ") + + llvm::utostr(++Entry) + " <<"); + Asm->OutStreamer.AddComment(Twine(" Call between ") + + BeginLabel->getName() + " and " + + EndLabel->getName()); + + if (!S.PadLabel) { + Asm->OutStreamer.AddComment(" has no landing pad"); + } else { + Asm->OutStreamer.AddComment(Twine(" jumps to ") + + S.PadLabel->getName()); + + if (S.Action == 0) + Asm->OutStreamer.AddComment(" On action: cleanup"); + else + Asm->OutStreamer.AddComment(Twine(" On action: ") + + llvm::utostr((S.Action - 1) / 2 + 1)); + } + + Asm->OutStreamer.AddBlankLine(); + } + // Offset of the call site relative to the previous call site, counted in // number of 16-byte bundles. The first call site is counted relative to // the start of the procedure fragment. - Asm->OutStreamer.AddComment("Region start"); Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); - - Asm->OutStreamer.AddComment("Region length"); Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); - // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. - Asm->OutStreamer.AddComment("Landing pad"); if (!S.PadLabel) Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); else @@ -597,45 +635,63 @@ void DwarfException::EmitExceptionTable() { // Offset of the first associated action record, relative to the start of // the action table. This value is biased by 1 (1 indicates the start of // the action table), and 0 indicates that there are no actions. - Asm->EmitULEB128(S.Action, "Action"); + Asm->EmitULEB128(S.Action); } } // Emit the Action Table. - if (Actions.size() != 0) { - Asm->OutStreamer.AddComment("-- Action Record Table --"); - Asm->OutStreamer.AddBlankLine(); - } - + int Entry = 0; for (SmallVectorImpl<ActionEntry>::const_iterator I = Actions.begin(), E = Actions.end(); I != E; ++I) { const ActionEntry &Action = *I; - Asm->OutStreamer.AddComment("Action Record"); - Asm->OutStreamer.AddBlankLine(); + + if (VerboseAsm) { + // Emit comments that decode the action table. + Asm->OutStreamer.AddComment(Twine(">> Action Record ") + + llvm::utostr(++Entry) + " <<"); + if (Action.ValueForTypeID >= 0) + Asm->OutStreamer.AddComment(Twine(" Catch TypeInfo ") + + llvm::itostr(Action.ValueForTypeID)); + else + Asm->OutStreamer.AddComment(Twine(" Filter TypeInfo ") + + llvm::itostr(Action.ValueForTypeID)); + + if (Action.NextAction == 0) { + Asm->OutStreamer.AddComment(" No further actions"); + } else { + unsigned NextAction = Entry + (Action.NextAction + 1) / 2; + Asm->OutStreamer.AddComment(Twine(" Continue to action ") + + llvm::utostr(NextAction)); + } + + Asm->OutStreamer.AddBlankLine(); + } // Type Filter // // Used by the runtime to match the type of the thrown exception to the // type of the catch clauses or the types in the exception specification. - Asm->EmitSLEB128(Action.ValueForTypeID, " TypeInfo index"); + Asm->EmitSLEB128(Action.ValueForTypeID); // Action Record // // Self-relative signed displacement in bytes of the next action record, // or 0 if there is no next action record. - Asm->EmitSLEB128(Action.NextAction, " Next action"); + Asm->EmitSLEB128(Action.NextAction); } // Emit the Catch TypeInfos. - if (!TypeInfos.empty()) { - Asm->OutStreamer.AddComment("-- Catch TypeInfos --"); + if (VerboseAsm && !TypeInfos.empty()) { + Asm->OutStreamer.AddComment(">> Catch TypeInfos <<"); Asm->OutStreamer.AddBlankLine(); + Entry = TypeInfos.size(); } + for (std::vector<const GlobalVariable *>::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { const GlobalVariable *GV = *I; - - Asm->OutStreamer.AddComment("TypeInfo"); + if (VerboseAsm) + Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--)); if (GV) Asm->EmitReference(GV, TTypeEncoding); else @@ -644,14 +700,21 @@ void DwarfException::EmitExceptionTable() { } // Emit the Exception Specifications. - if (!FilterIds.empty()) { - Asm->OutStreamer.AddComment("-- Filter IDs --"); + if (VerboseAsm && !FilterIds.empty()) { + Asm->OutStreamer.AddComment(">> Filter TypeInfos <<"); Asm->OutStreamer.AddBlankLine(); + Entry = 0; } for (std::vector<unsigned>::const_iterator I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { unsigned TypeID = *I; - Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0); + if (VerboseAsm) { + --Entry; + if (TypeID != 0) + Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry)); + } + + Asm->EmitULEB128(TypeID); } Asm->EmitAlignment(2); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 719cd26..99090a8 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -108,6 +108,9 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { while (!MBB->succ_empty()) MBB->removeSuccessor(MBB->succ_end()-1); + // Avoid matching if this pointer gets reused. + TriedMerging.erase(MBB); + // Remove the block. MF->erase(MBB); } @@ -171,6 +174,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MachineModuleInfo *mmi) { if (!tii) return false; + TriedMerging.clear(); + TII = tii; TRI = tri; MMI = mmi; @@ -361,11 +366,31 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, return TailLen; } +void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB) { + if (RS) { + RS->enterBasicBlock(CurMBB); + if (!CurMBB->empty()) + RS->forward(prior(CurMBB->end())); + BitVector RegsLiveAtExit(TRI->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) + if (RegsLiveAtExit[i]) + NewMBB->addLiveIn(i); + } +} + /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything /// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { + MachineBasicBlock *CurMBB = OldInst->getParent(); + TII->ReplaceTailWithBranchTo(OldInst, NewDest); + + // For targets that use the register scavenger, we must maintain LiveIns. + MaintainLiveIns(CurMBB, NewDest); + ++NumTailMerge; } @@ -394,16 +419,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); // For targets that use the register scavenger, we must maintain LiveIns. - if (RS) { - RS->enterBasicBlock(&CurMBB); - if (!CurMBB.empty()) - RS->forward(prior(CurMBB.end())); - BitVector RegsLiveAtExit(TRI->getNumRegs()); - RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) - if (RegsLiveAtExit[i]) - NewMBB->addLiveIn(i); - } + MaintainLiveIns(&CurMBB, NewMBB); return NewMBB; } @@ -416,10 +432,10 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, for (; I != E; ++I) { if (I->isDebugValue()) continue; - const TargetInstrDesc &TID = I->getDesc(); - if (TID.isCall()) + const MCInstrDesc &MCID = I->getDesc(); + if (MCID.isCall()) Time += 10; - else if (TID.mayLoad() || TID.mayStore()) + else if (MCID.mayLoad() || MCID.mayStore()) Time += 2; else ++Time; @@ -799,14 +815,21 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // First find blocks with no successors. MergePotentials.clear(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E && MergePotentials.size() < TailMergeThreshold; ++I) { + if (TriedMerging.count(I)) + continue; if (I->succ_empty()) MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); } + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); // See if we can do any tail merging on those. - if (MergePotentials.size() < TailMergeThreshold && - MergePotentials.size() >= 2) + if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(NULL, NULL); // Look at blocks (IBB) with multiple predecessors (PBB). @@ -830,15 +853,17 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) { - if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { + if (I->pred_size() >= 2) { SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; MachineBasicBlock *IBB = I; MachineBasicBlock *PredBB = prior(I); MergePotentials.clear(); for (MachineBasicBlock::pred_iterator P = I->pred_begin(), E2 = I->pred_end(); - P != E2; ++P) { + P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { MachineBasicBlock *PBB = *P; + if (TriedMerging.count(PBB)) + continue; // Skip blocks that loop to themselves, can't tail merge these. if (PBB == IBB) continue; @@ -891,6 +916,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } } + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(IBB, PredBB); // Reinsert an unconditional branch if needed. diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 4daf4ec..df795df 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -10,6 +10,7 @@ #ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP #define LLVM_CODEGEN_BRANCHFOLDING_HPP +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include <vector> @@ -47,6 +48,7 @@ namespace llvm { }; typedef std::vector<MergePotentialsElt>::iterator MPIterator; std::vector<MergePotentialsElt> MergePotentials; + SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging; class SameTailElt { MPIterator MPIter; @@ -93,6 +95,8 @@ namespace llvm { bool TailMergeBlocks(MachineFunction &MF); bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); + void MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB); void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest); MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index c726d92..06d2a95 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -33,6 +33,8 @@ add_llvm_library(LLVMCodeGen LocalStackSlotAllocation.cpp LowerSubregs.cpp MachineBasicBlock.cpp + MachineBlockFrequency.cpp + MachineBranchProbabilityInfo.cpp MachineCSE.cpp MachineDominators.cpp MachineFunction.cpp @@ -58,7 +60,6 @@ add_llvm_library(LLVMCodeGen Passes.cpp PeepholeOptimizer.cpp PostRASchedulerList.cpp - PreAllocSplitting.cpp ProcessImplicitDefs.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp @@ -78,7 +79,6 @@ add_llvm_library(LLVMCodeGen ScoreboardHazardRecognizer.cpp ShadowStackGC.cpp ShrinkWrapping.cpp - SimpleRegisterCoalescing.cpp SjLjEHPrepare.cpp SlotIndexes.cpp Spiller.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 5d722ee..e6b3bbc 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -188,6 +188,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); const TargetRegisterClass *OldRC = MRI.getRegClass(reg); const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); @@ -202,8 +203,11 @@ void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { // TRI doesn't have accurate enough information to model this yet. if (I.getOperand().getSubReg()) return; + // Inline asm instuctions don't remember their constraints. + if (I->isInlineAsm()) + return; const TargetRegisterClass *OpRC = - I->getDesc().getRegClass(I.getOperandNo(), TRI); + TII->getRegClass(I->getDesc(), I.getOperandNo(), TRI); if (OpRC) NewRC = getCommonSubClass(NewRC, OpRC); if (!NewRC || NewRC == OldRC) diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 515e6f9..489746c 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -37,13 +37,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeOptimizePHIsPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); - initializePreAllocSplittingPass(Registry); initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); initializeRALinScanPass(Registry); - initializeRegisterCoalescerAnalysisGroup(Registry); + initializeRegisterCoalescerPass(Registry); initializeRenderMachineFunctionPass(Registry); - initializeSimpleRegisterCoalescingPass(Registry); initializeSlotIndexesPass(Registry); initializeLoopSplitterPass(Registry); initializeStackProtectorPass(Registry); diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 51d984f..84c4d59 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -27,12 +27,12 @@ using namespace llvm; CriticalAntiDepBreaker:: -CriticalAntiDepBreaker(MachineFunction& MFi) : +CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), - AllocatableSet(TRI->getAllocatableSet(MF)), + RegClassInfo(RCI), Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)), KillIndices(TRI->getNumRegs(), 0), DefIndices(TRI->getNumRegs(), 0) {} @@ -207,7 +207,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { const TargetRegisterClass *NewRC = 0; if (i < MI->getDesc().getNumOperands()) - NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); + NewRC = TII->getRegClass(MI->getDesc(), i, TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -295,7 +295,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, const TargetRegisterClass *NewRC = 0; if (i < MI->getDesc().getNumOperands()) - NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); + NewRC = TII->getRegClass(MI->getDesc(), i, TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -385,11 +385,9 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin, unsigned LastNewReg, const TargetRegisterClass *RC) { - for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), - RE = RC->allocation_order_end(MF); R != RE; ++R) { - unsigned NewReg = *R; - // Don't consider non-allocatable registers - if (!AllocatableSet.test(NewReg)) continue; + ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC); + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned NewReg = Order[i]; // Don't replace a register with itself. if (NewReg == AntiDepReg) continue; // Don't replace a register with one that was recently used to repair @@ -534,7 +532,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!AllocatableSet.test(AntiDepReg)) + if (!RegClassInfo.isAllocatable(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.count(AntiDepReg)) diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 5bbb8f5..0710780 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -17,6 +17,7 @@ #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H #include "AntiDepBreaker.h" +#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,6 +28,7 @@ #include <map> namespace llvm { +class RegisterClassInfo; class TargetInstrInfo; class TargetRegisterInfo; @@ -35,6 +37,7 @@ class TargetRegisterInfo; MachineRegisterInfo &MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const RegisterClassInfo &RegClassInfo; /// AllocatableSet - The set of allocatable registers. /// We'll be ignoring anti-dependencies on non-allocatable registers, @@ -66,7 +69,7 @@ class TargetRegisterInfo; SmallSet<unsigned, 4> KeepRegs; public: - CriticalAntiDepBreaker(MachineFunction& MFi); + CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); ~CriticalAntiDepBreaker(); /// Start - Initialize anti-dep breaking for a new basic block. diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index fdc1d91..6de6c0c 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -110,9 +110,14 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { LivePhysRegs.set(Reg); } - // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs - // are not live across blocks, but some targets (x86) can have flags live - // out of a block. + // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not + // live across blocks, but some targets (x86) can have flags live out of a + // block. + for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(), + E = MBB->succ_end(); S != E; S++) + for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin(); + LI != (*S)->livein_end(); LI++) + LivePhysRegs.set(*LI); // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 22c5465..03604b0 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -336,8 +336,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() { Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. CallInst *NewSelector = - CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(), - "eh.sel.catch.all", II); + CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II); NewSelector->setTailCall(II->isTailCall()); NewSelector->setAttributes(II->getAttributes()); @@ -497,10 +496,8 @@ bool DwarfEHPrepare::LowerUnwindsAndResumes() { // Find the rewind function if we didn't already. if (!RewindFunction) { LLVMContext &Ctx = ResumeInsts[0]->getContext(); - std::vector<const Type*> - Params(1, Type::getInt8PtrTy(Ctx)); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), - Params, false); + Type::getInt8PtrTy(Ctx), false); const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); } diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index fa2319b..d977651 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -659,11 +659,11 @@ bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { /// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the /// function pointers, ignoring the init priority. -void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) { +void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) { // Should be an array of '{ i32, void ()* }' structs. The first value is the // init priority, which we ignore. if (List->isNullValue()) return; - ConstantArray *InitList = cast<ConstantArray>(List); + const ConstantArray *InitList = cast<ConstantArray>(List); for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { if (InitList->getOperand(i)->isNullValue()) continue; diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h index b8bac55..6f7fbac 100644 --- a/lib/CodeGen/ELFWriter.h +++ b/lib/CodeGen/ELFWriter.h @@ -232,7 +232,7 @@ namespace llvm { void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, ELFSection &GblS, int64_t Offset = 0); bool EmitSpecialLLVMGlobal(const GlobalVariable *GV); - void EmitXXStructorList(Constant *List, ELFSection &Xtor); + void EmitXXStructorList(const Constant *List, ELFSection &Xtor); void EmitRelocations(); void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA); void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr); diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index 646e014..a7aba89 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -39,7 +39,7 @@ void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const { bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { MF = &mf; EC.clear(); - EC.grow(2 * MF->size()); + EC.grow(2 * MF->getNumBlockIDs()); for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index ebc2fc9..a67140e 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -62,8 +62,8 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = MBBI++; // If MI is a pseudo, expand it. - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.usesCustomInsertionHook()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.usesCustomInsertionHook()) { Changed = true; MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 8b2c981..6cb2277 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -18,11 +18,12 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -173,10 +174,10 @@ namespace { private: bool ReverseBranchCondition(BBInfo &BBI); bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, - float Prediction, float Confidence) const; + const BranchProbability &Prediction) const; bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, - float Prediction, float Confidence) const; + const BranchProbability &Prediction) const; bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, unsigned &Dups1, unsigned &Dups2) const; void ScanInstructions(BBInfo &BBI); @@ -203,19 +204,19 @@ namespace { bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Cycle, unsigned Extra, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, - Prediction, Confidence); + Prediction); } bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TCycle, unsigned TExtra, MachineBasicBlock &FBB, unsigned FCycle, unsigned FExtra, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { return TCycle > 0 && FCycle > 0 && TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, - Prediction, Confidence); + Prediction); } // blockAlwaysFallThrough - Block ends without a terminator. @@ -450,7 +451,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { /// number of instructions that the ifcvt would need to duplicate if performed /// in Dups. bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -461,7 +462,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, if (TrueBBI.BB->pred_size() > 1) { if (TrueBBI.CannotBeCopied || !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize, - Prediction, Confidence)) + Prediction)) return false; Dups = TrueBBI.NonPredSize; } @@ -477,7 +478,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, /// if performed in 'Dups'. bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -499,8 +500,7 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, ++Size; } } - if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, - Prediction, Confidence)) + if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction)) return false; Dups = Size; } @@ -651,12 +651,12 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (I->isDebugValue()) continue; - const TargetInstrDesc &TID = I->getDesc(); - if (TID.isNotDuplicable()) + const MCInstrDesc &MCID = I->getDesc(); + if (MCID.isNotDuplicable()) BBI.CannotBeCopied = true; bool isPredicated = TII->isPredicated(I); - bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch(); + bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch(); if (!isCondBr) { if (!isPredicated) { @@ -751,8 +751,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, ScanInstructions(BBI); - // Unanalyzable or ends with fallthrough or unconditional branch. - if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) { + // Unanalyzable or ends with fallthrough or unconditional branch, or if is not + // considered for ifcvt anymore. + if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) { BBI.IsBeingAnalyzed = false; BBI.IsAnalyzed = true; return BBI; @@ -795,21 +796,20 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, // - backedge -> 90% taken // - early exit -> 20% taken // - branch predictor confidence -> 90% - float Prediction = 0.5f; - float Confidence = 0.9f; + BranchProbability Prediction(5, 10); MachineLoop *Loop = MLI->getLoopFor(BB); if (Loop) { if (TrueBBI.BB == Loop->getHeader()) - Prediction = 0.9f; + Prediction = BranchProbability(9, 10); else if (FalseBBI.BB == Loop->getHeader()) - Prediction = 0.1f; + Prediction = BranchProbability(1, 10); MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB); MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB); if (!TrueLoop || TrueLoop->getParentLoop() == Loop) - Prediction = 0.2f; + Prediction = BranchProbability(2, 10); else if (!FalseLoop || FalseLoop->getParentLoop() == Loop) - Prediction = 0.8f; + Prediction = BranchProbability(8, 10); } if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && @@ -817,7 +817,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, TrueBBI.ExtraCost), TrueBBI.ExtraCost2, *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + FalseBBI.ExtraCost),FalseBBI.ExtraCost2, - Prediction, Confidence) && + Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond) && FeasibilityAnalysis(FalseBBI, RevCond)) { // Diamond: @@ -833,9 +833,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Enqueued = true; } - if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) && + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - TrueBBI.ExtraCost2, Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { // Triangle: // EBB @@ -848,17 +848,17 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Enqueued = true; } - if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) && + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - TrueBBI.ExtraCost2, Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } - if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) && + if (ValidSimple(TrueBBI, Dups, Prediction) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - TrueBBI.ExtraCost2, Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { // Simple (split, no rejoin): // EBB @@ -874,29 +874,29 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (CanRevCond) { // Try the other path... if (ValidTriangle(FalseBBI, TrueBBI, false, Dups, - 1.0-Prediction, Confidence) && + Prediction.getCompl()) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } if (ValidTriangle(FalseBBI, TrueBBI, true, Dups, - 1.0-Prediction, Confidence) && + Prediction.getCompl()) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } - if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) && + if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; @@ -1414,9 +1414,9 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), E = FromBBI.BB->end(); I != E; ++I) { - const TargetInstrDesc &TID = I->getDesc(); + const MCInstrDesc &MCID = I->getDesc(); // Do not copy the end of the block branches. - if (IgnoreBr && TID.isBranch()) + if (IgnoreBr && MCID.isBranch()) break; MachineInstr *MI = MF.CloneMachineInstr(I); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 19ae333..5547f73 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -180,11 +180,7 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass, /// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, /// otherwise return 0. static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) { - if (!MI->isCopy()) - return 0; - if (MI->getOperand(0).getSubReg() != 0) - return 0; - if (MI->getOperand(1).getSubReg() != 0) + if (!MI->isFullCopy()) return 0; if (MI->getOperand(0).getReg() == Reg) return MI->getOperand(1).getReg(); @@ -307,7 +303,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Best spill candidate seen so far. This must dominate UseVNI. SibValueInfo SVI(UseReg, UseVNI); MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def); - unsigned SpillDepth = Loops.getLoopDepth(UseMBB); + MachineBasicBlock *SpillMBB = UseMBB; + unsigned SpillDepth = Loops.getLoopDepth(SpillMBB); bool SeenOrigPHI = false; // Original PHI met. do { @@ -320,7 +317,30 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Is this value a better spill candidate? if (!isRegToSpill(Reg)) { MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); - if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) { + if (MBB == SpillMBB) { + // This is an alternative def earlier in the same MBB. + // Hoist the spill as far as possible in SpillMBB. This can ease + // register pressure: + // + // x = def + // y = use x + // s = copy x + // + // Hoisting the spill of s to immediately after the def removes the + // interference between x and y: + // + // x = def + // spill x + // y = use x<kill> + // + if (VNI->def < SVI.SpillVNI->def) { + DEBUG(dbgs() << " hoist in BB#" << MBB->getNumber() << ": " + << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def + << '\n'); + SVI.SpillReg = Reg; + SVI.SpillVNI = VNI; + } + } else if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) { // This is a valid spill location dominating UseVNI. // Prefer to spill at a smaller loop depth. unsigned Depth = Loops.getLoopDepth(MBB); @@ -329,6 +349,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, << ':' << VNI->id << '@' << VNI->def << '\n'); SVI.SpillReg = Reg; SVI.SpillVNI = VNI; + SpillMBB = MBB; SpillDepth = Depth; } } @@ -429,6 +450,7 @@ void InlineSpiller::analyzeSiblingValues() { // Check possible sibling copies. if (VNI->isPHIDef() || VNI->getCopy()) { VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + assert(OrigVNI && "Def outside original live range"); if (OrigVNI->def != VNI->def) DefMI = traceSiblingValue(Reg, VNI, OrigVNI); } diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index b1014a9..a09bb39 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "regalloc" #include "InterferenceCache.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -40,9 +41,18 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { E = RoundRobin; if (++RoundRobin == CacheEntries) RoundRobin = 0; - Entries[E].reset(PhysReg, LIUArray, TRI, MF); - PhysRegEntries[PhysReg] = E; - return &Entries[E]; + for (unsigned i = 0; i != CacheEntries; ++i) { + // Skip entries that are in use. + if (Entries[E].hasRefs()) { + if (++E == CacheEntries) + E = 0; + continue; + } + Entries[E].reset(PhysReg, LIUArray, TRI, MF); + PhysRegEntries[PhysReg] = E; + return &Entries[E]; + } + llvm_unreachable("Ran out of interference cache entries."); } /// revalidate - LIU contents have changed, update tags. @@ -59,6 +69,7 @@ void InterferenceCache::Entry::reset(unsigned physReg, LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI, const MachineFunction *MF) { + assert(!hasRefs() && "Cannot reset cache entry with references"); // LIU's changed, invalidate cache. ++Tag; PhysReg = physReg; diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 6c36fa4..7f0a27a 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -43,6 +43,9 @@ class InterferenceCache { /// change. unsigned Tag; + /// RefCount - The total number of Cursor instances referring to this Entry. + unsigned RefCount; + /// MF - The current function. MachineFunction *MF; @@ -68,9 +71,10 @@ class InterferenceCache { void update(unsigned MBBNum); public: - Entry() : PhysReg(0), Tag(0), Indexes(0) {} + Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {} void clear(MachineFunction *mf, SlotIndexes *indexes) { + assert(!hasRefs() && "Cannot clear cache entry with references"); PhysReg = 0; MF = mf; Indexes = indexes; @@ -78,6 +82,10 @@ class InterferenceCache { unsigned getPhysReg() const { return PhysReg; } + void addRef(int Delta) { RefCount += Delta; } + + bool hasRefs() const { return RefCount > 0; } + void revalidate(); /// valid - Return true if this is a valid entry for physReg. @@ -122,15 +130,48 @@ public: void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, const TargetRegisterInfo *); + /// getMaxCursors - Return the maximum number of concurrent cursors that can + /// be supported. + unsigned getMaxCursors() const { return CacheEntries; } + /// Cursor - The primary query interface for the block interference cache. class Cursor { Entry *CacheEntry; BlockInterference *Current; + + void setEntry(Entry *E) { + Current = 0; + // Update reference counts. Nothing happens when RefCount reaches 0, so + // we don't have to check for E == CacheEntry etc. + if (CacheEntry) + CacheEntry->addRef(-1); + CacheEntry = E; + if (CacheEntry) + CacheEntry->addRef(+1); + } + public: - /// Cursor - Create a cursor for the interference allocated to PhysReg and - /// all its aliases. - Cursor(InterferenceCache &Cache, unsigned PhysReg) - : CacheEntry(Cache.get(PhysReg)), Current(0) {} + /// Cursor - Create a dangling cursor. + Cursor() : CacheEntry(0), Current(0) {} + ~Cursor() { setEntry(0); } + + Cursor(const Cursor &O) : CacheEntry(0), Current(0) { + setEntry(O.CacheEntry); + } + + Cursor &operator=(const Cursor &O) { + setEntry(O.CacheEntry); + return *this; + } + + /// setPhysReg - Point this cursor to PhysReg's interference. + void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) { + // Release reference before getting a new one. That guarantees we can + // actually have CacheEntries live cursors. + setEntry(0); + if (PhysReg) + setEntry(Cache.get(PhysReg)); + } /// moveTo - Move cursor to basic block MBBNum. void moveToBlock(unsigned MBBNum) { diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 3861dda..611886f 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -29,7 +29,7 @@ static void EnsureFunctionExists(Module &M, const char *Name, ArgIt ArgBegin, ArgIt ArgEnd, const Type *RetTy) { // Insert a correctly-typed definition now. - std::vector<const Type *> ParamTys; + std::vector<Type *> ParamTys; for (ArgIt I = ArgBegin; I != ArgEnd; ++I) ParamTys.push_back(I->getType()); M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); @@ -69,7 +69,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, // program already contains a function with this name. Module *M = CI->getParent()->getParent()->getParent(); // Get or insert the definition now. - std::vector<const Type *> ParamTys; + std::vector<Type *> ParamTys; for (ArgIt I = ArgBegin; I != ArgEnd; ++I) ParamTys.push_back((*I)->getType()); Constant* FCache = M->getOrInsertFunction(NewFn, @@ -77,7 +77,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, IRBuilder<> Builder(CI->getParent(), CI); SmallVector<Value *, 8> Args(ArgBegin, ArgEnd); - CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end()); + CallInst *NewCI = Builder.CreateCall(FCache, Args); NewCI->setName(CI->getName()); if (!CI->use_empty()) CI->replaceAllUsesWith(NewCI); @@ -353,6 +353,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { report_fatal_error("Code generator does not support intrinsic function '"+ Callee->getName()+"'!"); + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + Value *V = CI->getArgOperand(0); + CI->replaceAllUsesWith(V); + break; + } + // The setjmp/longjmp intrinsics should only exist in the code if it was // never optimized (ie, right out of the CFE), or if it has been hacked on // by the lowerinvoke pass. In both cases, the right thing to do is to @@ -546,14 +553,13 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) { !CI->getType()->isIntegerTy()) return false; - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (!Ty) return false; // Okay, we can do this xform, do so now. - const Type *Tys[] = { Ty }; Module *M = CI->getParent()->getParent()->getParent(); - Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty); Value *Op = CI->getArgOperand(0); Op = CallInst::Create(Int, Op, CI->getName(), CI); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 589d0a9..f985af8 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -24,10 +24,14 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" @@ -98,10 +102,10 @@ static cl::opt<cl::boolOrDefault> EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); -LLVMTargetMachine::LLVMTargetMachine(const Target &T, - const std::string &Triple) - : TargetMachine(T), TargetTriple(Triple) { - AsmInfo = T.createAsmInfo(TargetTriple); +LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, + StringRef CPU, StringRef FS) + : TargetMachine(T, Triple, CPU, FS) { + AsmInfo = T.createMCAsmInfo(Triple); } // Set the default code model for the JIT for a generic target. @@ -136,14 +140,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, default: return true; case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(*this, MAI.getAssemblerDialect(), MAI); + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; TargetAsmBackend *TAB = 0; if (ShowMCEncoding) { - MCE = getTarget().createCodeEmitter(*this, *Context); - TAB = getTarget().createAsmBackend(TargetTriple); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, *Context); + TAB = getTarget().createAsmBackend(getTargetTriple()); } MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, @@ -159,13 +164,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context); - TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, + *Context); + TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple()); if (MCE == 0 || TAB == 0) return true; - AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context, - *TAB, Out, MCE, + AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), + *Context, *TAB, Out, MCE, hasMCRelaxAll(), hasMCNoExecStack())); AsmStreamer.get()->InitSections(); @@ -240,13 +247,14 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Ctx); - TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(),STI, *Ctx); + TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple()); if (MCE == 0 || TAB == 0) return true; OwningPtr<MCStreamer> AsmStreamer; - AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Ctx, + AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), *Ctx, *TAB, Out, MCE, hasMCRelaxAll(), hasMCNoExecStack())); @@ -384,6 +392,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Expand pseudo-instructions emitted by ISel. PM.add(createExpandISelPseudosPass()); + // Pre-ra tail duplication. + if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { + PM.add(createTailDuplicatePass(true)); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + } + // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. if (OptLevel != CodeGenOpt::None) @@ -412,12 +426,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After codegen peephole optimization pass"); } - // Pre-ra tail duplication. - if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { - PM.add(createTailDuplicatePass(true)); - printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); - } - // Run pre-ra passes. if (addPreRegAlloc(PM, OptLevel)) printAndVerify(PM, "After PreRegAlloc passes"); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 292928f..5d38c83 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -123,7 +123,7 @@ public: /// getNext - Return the next UserValue in the equivalence class. UserValue *getNext() const { return next; } - /// match - Does this UserValue match the aprameters? + /// match - Does this UserValue match the parameters? bool match(const MDNode *Var, unsigned Offset) const { return Var == variable && Offset == offset; } diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index b67f966..70003e7 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -244,7 +244,7 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { // // For comments on how to speed it up, see Query::findIntersection(). unsigned LiveIntervalUnion::Query:: -collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) { +collectInterferingVRegs(unsigned MaxInterferingRegs) { InterferenceResult IR = firstInterference(); LiveInterval::iterator VirtRegEnd = VirtReg->end(); LiveInterval *RecentInterferingVReg = NULL; @@ -287,10 +287,6 @@ collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) { RecentInterferingVReg = IR.LiveUnionI.value(); ++IR.LiveUnionI; - // Stop collecting when the max weight is exceeded. - if (RecentInterferingVReg->weight >= MaxWeight) - return InterferingVRegs.size(); - continue; } // VirtRegI may have advanced far beyond LiveUnionI, diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index c83578e..5e78d5e 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -229,8 +229,7 @@ public: // Count the virtual registers in this union that interfere with this // query's live virtual register, up to maxInterferingRegs. - unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX, - float MaxWeight = HUGE_VALF); + unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX); // Was this virtual register visited during collectInterferingVRegs? bool isSeenInterference(LiveInterval *VReg) const; diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 052abad..b385fb3 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -298,10 +298,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, if (NumComp <= 1) continue; ++NumFracRanges; + bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg; DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector<LiveInterval*, 8> Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { Dups.push_back(&createFrom(LI->reg, LIS, VRM)); + // If LI is an original interval that hasn't been split yet, make the new + // intervals their own originals instead of referring to LI. The original + // interval must contain all the split products, and LI doesn't. + if (IsOriginal) + VRM.setIsSplitFromReg(Dups.back()->reg, 0); if (delegate_) delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 68946a2..8f0fb46 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -22,7 +22,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Assembly/Writer.h" @@ -61,7 +60,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { return OS; } -/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the +/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the /// parent pointer of the MBB, the MBB numbering, and any instructions in the /// MBB to be on the right operand list for registers. /// @@ -93,7 +92,7 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { assert(N->getParent() == 0 && "machine instruction already in a basic block"); N->setParent(Parent); - + // Add the instruction's register operands to their corresponding // use/def lists. MachineFunction *MF = Parent->getParent(); @@ -110,7 +109,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { // Remove from the use/def lists. N->RemoveRegOperandsFromUseLists(); - + N->setParent(0); LeakDetector::addGarbageObject(N); @@ -339,25 +338,64 @@ void MachineBasicBlock::updateTerminator() { } } -void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) { - Successors.push_back(succ); - succ->addPredecessor(this); -} +void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) { + + // If we see non-zero value for the first time it means we actually use Weight + // list, so we fill all Weights with 0's. + if (weight != 0 && Weights.empty()) + Weights.resize(Successors.size()); + + if (weight != 0 || !Weights.empty()) + Weights.push_back(weight); + + Successors.push_back(succ); + succ->addPredecessor(this); + } void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) { succ->removePredecessor(this); succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); assert(I != Successors.end() && "Not a current successor!"); + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!Weights.empty()) { + weight_iterator WI = getWeightIterator(I); + Weights.erase(WI); + } + Successors.erase(I); } -MachineBasicBlock::succ_iterator +MachineBasicBlock::succ_iterator MachineBasicBlock::removeSuccessor(succ_iterator I) { assert(I != Successors.end() && "Not a current successor!"); + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!Weights.empty()) { + weight_iterator WI = getWeightIterator(I); + Weights.erase(WI); + } + (*I)->removePredecessor(this); return Successors.erase(I); } +void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, + MachineBasicBlock *New) { + uint32_t weight = 0; + succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old); + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!Weights.empty()) { + weight_iterator WI = getWeightIterator(SI); + weight = *WI; + } + + // Update the successor information. + removeSuccessor(SI); + addSuccessor(New, weight); +} + void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { Predecessors.push_back(pred); } @@ -371,10 +409,17 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { if (this == fromMBB) return; - + while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - addSuccessor(Succ); + uint32_t weight = 0; + + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!fromMBB->Weights.empty()) + weight = *fromMBB->Weights.begin(); + + addSuccessor(Succ, weight); fromMBB->removeSuccessor(Succ); } } @@ -383,7 +428,7 @@ void MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { if (this == fromMBB) return; - + while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); addSuccessor(Succ); @@ -637,15 +682,14 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, } // Update the successor information. - removeSuccessor(Old); - addSuccessor(New); + replaceSuccessor(Old, New); } /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the /// CFG to be inserted. If we have proven that MBB can only branch to DestA and /// DestB, remove any other MBB successors from the CFG. DestA and DestB can be /// null. -/// +/// /// Besides DestA and DestB, retain other edges leading to LandingPads /// (currently there can be only one; we don't check or require that here). /// Note it is possible that DestA and/or DestB are LandingPads. @@ -720,6 +764,26 @@ MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) { return DL; } +/// getSuccWeight - Return weight of the edge from this block to MBB. +/// +uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) { + if (Weights.empty()) + return 0; + + succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); + return *getWeightIterator(I); +} + +/// getWeightIterator - Return wight iterator corresonding to the I successor +/// iterator +MachineBasicBlock::weight_iterator MachineBasicBlock:: +getWeightIterator(MachineBasicBlock::succ_iterator I) { + assert(Weights.size() == Successors.size() && "Async weight list!"); + size_t index = std::distance(Successors.begin(), I); + assert(index < Weights.size() && "Not a current successor!"); + return Weights.begin() + index; +} + void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, bool t) { OS << "BB#" << MBB->getNumber(); diff --git a/lib/CodeGen/MachineBlockFrequency.cpp b/lib/CodeGen/MachineBlockFrequency.cpp new file mode 100644 index 0000000..893a320 --- /dev/null +++ b/lib/CodeGen/MachineBlockFrequency.cpp @@ -0,0 +1,59 @@ +//====----- MachineBlockFrequency.cpp - Machine Block Frequency Analysis ----====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/CodeGen/MachineBlockFrequency.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(MachineBlockFrequency, "machine-block-freq", + "Machine Block Frequency Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_END(MachineBlockFrequency, "machine-block-freq", + "Machine Block Frequency Analysis", true, true) + +char MachineBlockFrequency::ID = 0; + + +MachineBlockFrequency::MachineBlockFrequency() : MachineFunctionPass(ID) { + initializeMachineBlockFrequencyPass(*PassRegistry::getPassRegistry()); + MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction, + MachineBranchProbabilityInfo>(); +} + +MachineBlockFrequency::~MachineBlockFrequency() { + delete MBFI; +} + +void MachineBlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.setPreservesAll(); +} + +bool MachineBlockFrequency::runOnMachineFunction(MachineFunction &F) { + MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>(); + MBFI->doFunction(&F, &MBPI); + return false; +} + +/// getblockFreq - Return block frequency. Never return 0, value must be +/// positive. Please note that initial frequency is equal to 1024. It means that +/// we should not rely on the value itself, but only on the comparison to the +/// other block frequencies. We do this to avoid using of floating points. +/// +uint32_t MachineBlockFrequency::getBlockFreq(MachineBasicBlock *MBB) { + return MBFI->getBlockFreq(MBB); +} diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp new file mode 100644 index 0000000..c13fa6b --- /dev/null +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -0,0 +1,113 @@ +//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This analysis uses probability info stored in Machine Basic Blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Instructions.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) +INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) + +char MachineBranchProbabilityInfo::ID = 0; + +uint32_t MachineBranchProbabilityInfo:: +getSumForBlock(MachineBasicBlock *MBB) const { + uint32_t Sum = 0; + + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + MachineBasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(MBB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + } + + return Sum; +} + +uint32_t +MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + uint32_t Weight = Src->getSuccWeight(Dst); + if (!Weight) + return DEFAULT_WEIGHT; + return Weight; +} + +bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + // Hot probability is at least 4/5 = 80% + uint32_t Weight = getEdgeWeight(Src, Dst); + uint32_t Sum = getSumForBlock(Src); + + // FIXME: Implement BranchProbability::compare then change this code to + // compare this BranchProbability against a static "hot" BranchProbability. + return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; +} + +MachineBasicBlock * +MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { + uint32_t Sum = 0; + uint32_t MaxWeight = 0; + MachineBasicBlock *MaxSucc = 0; + + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + MachineBasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(MBB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + + if (Weight > MaxWeight) { + MaxWeight = Weight; + MaxSucc = Succ; + } + } + + // FIXME: Use BranchProbability::compare. + if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4) + return MaxSucc; + + return 0; +} + +BranchProbability +MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + uint32_t N = getEdgeWeight(Src, Dst); + uint32_t D = getSumForBlock(Src); + + return BranchProbability(N, D); +} + +raw_ostream &MachineBranchProbabilityInfo:: +printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + + const BranchProbability Prob = getEdgeProbability(Src, Dst); + OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber() + << " probability is " << Prob + << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); + + return OS; +} diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index f97ccf6..3a60a37 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -260,12 +260,12 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { return false; // Ignore stuff that we obviously can't move. - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayStore() || TID.isCall() || TID.isTerminator() || + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() || MI->hasUnmodeledSideEffects()) return false; - if (TID.mayLoad()) { + if (MCID.mayLoad()) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 50750a5..cd25156 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -152,10 +152,10 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { /// of `new MachineInstr'. /// MachineInstr * -MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID, +MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImp) { return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) - MachineInstr(TID, DL, NoImp); + MachineInstr(MCID, DL, NoImp); } /// CloneMachineInstr - Create a new MachineInstr which is a copy of the diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 36b0b83..143a29b 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -15,19 +15,22 @@ #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/InlineAsm.h" +#include "llvm/LLVMContext.h" #include "llvm/Metadata.h" +#include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Value.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" @@ -194,6 +197,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { getSubReg() == Other.getSubReg(); case MachineOperand::MO_Immediate: return getImm() == Other.getImm(); + case MachineOperand::MO_CImmediate: + return getCImm() == Other.getCImm(); case MachineOperand::MO_FPImmediate: return getFPImm() == Other.getFPImm(); case MachineOperand::MO_MachineBasicBlock: @@ -267,6 +272,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { case MachineOperand::MO_Immediate: OS << getImm(); break; + case MachineOperand::MO_CImmediate: + getCImm()->getValue().print(OS, false); + break; case MachineOperand::MO_FPImmediate: if (getFPImm()->getType()->isFloatTy()) OS << getFPImm()->getValueAPF().convertToFloat(); @@ -454,9 +462,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { //===----------------------------------------------------------------------===// /// MachineInstr ctor - This constructor creates a dummy MachineInstr with -/// TID NULL and no operands. +/// MCID NULL and no operands. MachineInstr::MachineInstr() - : TID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { // Make sure that we get added to a machine basicblock @@ -464,23 +472,23 @@ MachineInstr::MachineInstr() } void MachineInstr::addImplicitDefUseOperands() { - if (TID->ImplicitDefs) - for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs) + if (MCID->ImplicitDefs) + for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs) addOperand(MachineOperand::CreateReg(*ImpDefs, true, true)); - if (TID->ImplicitUses) - for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses) + if (MCID->ImplicitUses) + for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses) addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); } /// MachineInstr ctor - This constructor creates a MachineInstr and adds the /// implicit operands. It reserves space for the number of operands specified by -/// the TargetInstrDesc. -MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), +/// the MCInstrDesc. +MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { if (!NoImp) - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); if (!NoImp) addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -488,13 +496,13 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) } /// MachineInstr ctor - As above, but with a DebugLoc. -MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, +MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { if (!NoImp) - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); if (!NoImp) addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -504,12 +512,12 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// MachineInstr ctor - Work exactly the same as the ctor two above, except /// that the MachineInstr is created and added to the end of the specified /// basic block. -MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), +MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -519,12 +527,12 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) /// MachineInstr ctor - As above, but with a DebugLoc. /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, - const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + const MCInstrDesc &tid) + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -534,7 +542,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : TID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -621,7 +629,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { Operands.back().AddRegOperandToRegInfo(RegInfo); // If the register operand is flagged as early, mark the operand as such unsigned OpNo = Operands.size() - 1; - if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } return; @@ -643,7 +651,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Operands[OpNo].isReg()) { Operands[OpNo].AddRegOperandToRegInfo(0); // If the register operand is flagged as early, mark the operand as such - if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } @@ -668,7 +676,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Operands[OpNo].isReg()) { Operands[OpNo].AddRegOperandToRegInfo(RegInfo); // If the register operand is flagged as early, mark the operand as such - if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } @@ -691,7 +699,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { // If the register operand is flagged as early, mark the operand as such if (Operands[OpNo].isReg() - && TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + && MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } } @@ -794,6 +802,11 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, return false; } } + // If DebugLoc does not match then two dbg.values are not identical. + if (isDebugValue()) + if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown() + && getDebugLoc() != Other->getDebugLoc()) + return false; return true; } @@ -817,8 +830,8 @@ void MachineInstr::eraseFromParent() { /// OperandComplete - Return true if it's illegal to add a new operand /// bool MachineInstr::OperandsComplete() const { - unsigned short NumOperands = TID->getNumOperands(); - if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands) + unsigned short NumOperands = MCID->getNumOperands(); + if (!MCID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands) return true; // Broken: we have all the operands of this instruction! return false; } @@ -826,8 +839,8 @@ bool MachineInstr::OperandsComplete() const { /// getNumExplicitOperands - Returns the number of non-implicit operands. /// unsigned MachineInstr::getNumExplicitOperands() const { - unsigned NumOperands = TID->getNumOperands(); - if (!TID->isVariadic()) + unsigned NumOperands = MCID->getNumOperands(); + if (!MCID->isVariadic()) return NumOperands; for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) { @@ -928,10 +941,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, /// operand list that is used to represent the predicate. It returns -1 if /// none is found. int MachineInstr::findFirstPredOperandIdx() const { - const TargetInstrDesc &TID = getDesc(); - if (TID.isPredicable()) { + const MCInstrDesc &MCID = getDesc(); + if (MCID.isPredicable()) { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (TID.OpInfo[i].isPredicate()) + if (MCID.OpInfo[i].isPredicate()) return i; } @@ -987,11 +1000,11 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { } assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!"); - const TargetInstrDesc &TID = getDesc(); - for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + const MCInstrDesc &MCID = getDesc(); + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (MO.isReg() && MO.isUse() && - TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) { + MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) { if (UseOpIdx) *UseOpIdx = (unsigned)i; return true; @@ -1047,13 +1060,13 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { return false; } - const TargetInstrDesc &TID = getDesc(); - if (UseOpIdx >= TID.getNumOperands()) + const MCInstrDesc &MCID = getDesc(); + if (UseOpIdx >= MCID.getNumOperands()) return false; const MachineOperand &MO = getOperand(UseOpIdx); if (!MO.isReg() || !MO.isUse()) return false; - int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO); + int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO); if (DefIdx == -1) return false; if (DefOpIdx) @@ -1093,11 +1106,11 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { /// copyPredicates - Copies predicate operand(s) from MI. void MachineInstr::copyPredicates(const MachineInstr *MI) { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isPredicable()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isPredicable()) return; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (TID.OpInfo[i].isPredicate()) { + if (MCID.OpInfo[i].isPredicate()) { // Predicated operands must be last operands. addOperand(MI->getOperand(i)); } @@ -1134,13 +1147,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, AliasAnalysis *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. - if (TID->mayStore() || TID->isCall()) { + if (MCID->mayStore() || MCID->isCall()) { SawStore = true; return false; } if (isLabel() || isDebugValue() || - TID->isTerminator() || hasUnmodeledSideEffects()) + MCID->isTerminator() || hasUnmodeledSideEffects()) return false; // See if this instruction does a load. If so, we have to guarantee that the @@ -1148,7 +1161,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // destination. The check for isInvariantLoad gives the targe the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (TID->mayLoad() && !isInvariantLoad(AA)) + if (MCID->mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and // end of block, or if the load is volatile, we can't move it. return !SawStore && !hasVolatileMemoryRef(); @@ -1188,9 +1201,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, /// have no volatile memory references. bool MachineInstr::hasVolatileMemoryRef() const { // An instruction known never to access memory won't have a volatile access. - if (!TID->mayStore() && - !TID->mayLoad() && - !TID->isCall() && + if (!MCID->mayStore() && + !MCID->mayLoad() && + !MCID->isCall() && !hasUnmodeledSideEffects()) return false; @@ -1214,7 +1227,7 @@ bool MachineInstr::hasVolatileMemoryRef() const { /// *all* loads the instruction does are invariant (if it does multiple loads). bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { // If the instruction doesn't load at all, it isn't an invariant load. - if (!TID->mayLoad()) + if (!MCID->mayLoad()) return false; // If the instruction has lost its memoperands, conservatively assume that @@ -1364,6 +1377,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // Print the rest of the operands. bool OmittedAnyCallClobbers = false; bool FirstOp = true; + unsigned AsmDescOp = ~0u; + unsigned AsmOpCount = 0; if (isInlineAsm()) { // Print asm string. @@ -1377,7 +1392,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (ExtraInfo & InlineAsm::Extra_IsAlignStack) OS << " [alignstack]"; - StartOp = InlineAsm::MIOp_FirstOperand; + StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand; FirstOp = false; } @@ -1416,10 +1431,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (FirstOp) FirstOp = false; else OS << ","; OS << " "; if (i < getDesc().NumOperands) { - const TargetOperandInfo &TOI = getDesc().OpInfo[i]; - if (TOI.isPredicate()) + const MCOperandInfo &MCOI = getDesc().OpInfo[i]; + if (MCOI.isPredicate()) OS << "pred:"; - if (TOI.isOptionalDef()) + if (MCOI.isOptionalDef()) OS << "opt:"; } if (isDebugValue() && MO.isMetadata()) { @@ -1431,6 +1446,26 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { MO.print(OS, TM); } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm()); + } else if (i == AsmDescOp && MO.isImm()) { + // Pretty print the inline asm operand descriptor. + OS << '$' << AsmOpCount++; + unsigned Flag = MO.getImm(); + switch (InlineAsm::getKind(Flag)) { + case InlineAsm::Kind_RegUse: OS << ":[reguse]"; break; + case InlineAsm::Kind_RegDef: OS << ":[regdef]"; break; + case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec]"; break; + case InlineAsm::Kind_Clobber: OS << ":[clobber]"; break; + case InlineAsm::Kind_Imm: OS << ":[imm]"; break; + case InlineAsm::Kind_Mem: OS << ":[mem]"; break; + default: OS << ":[??" << InlineAsm::getKind(Flag) << ']'; break; + } + + unsigned TiedTo = 0; + if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo)) + OS << " [tiedto:$" << TiedTo << ']'; + + // Compute the index of the next operand descriptor. + AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag); } else MO.print(OS, TM); } @@ -1685,3 +1720,24 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { } return Hash; } + +void MachineInstr::emitError(StringRef Msg) const { + // Find the source location cookie. + unsigned LocCookie = 0; + const MDNode *LocMD = 0; + for (unsigned i = getNumOperands(); i != 0; --i) { + if (getOperand(i-1).isMetadata() && + (LocMD = getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + } + + if (const MachineBasicBlock *MBB = getParent()) + if (const MachineFunction *MF = MBB->getParent()) + return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg); + report_fatal_error(Msg); +} diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index b315702..722ceb2 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -28,10 +28,10 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" @@ -1018,9 +1018,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { /*UnfoldStore=*/false, &LoadRegIndex); if (NewOpc == 0) return 0; - const TargetInstrDesc &TID = TII->get(NewOpc); - if (TID.getNumDefs() != 1) return 0; - const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); + const MCInstrDesc &MID = TII->get(NewOpc); + if (MID.getNumDefs() != 1) return 0; + const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI); // Ok, we're unfolding. Create a temporary register and do the unfold. unsigned Reg = MRI->createVirtualRegister(RC); diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 08ff5bb..4b3e64c 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -20,7 +20,6 @@ using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - RegClass2VRegMap = new std::vector<unsigned>[TRI.getNumRegClasses()]; UsedPhysRegs.resize(TRI.getNumRegs()); // Create the physreg use/def lists. @@ -38,25 +37,13 @@ MachineRegisterInfo::~MachineRegisterInfo() { "PhysRegUseDefLists has entries after all instructions are deleted"); #endif delete [] PhysRegUseDefLists; - delete [] RegClass2VRegMap; } /// setRegClass - Set the register class of the specified virtual register. /// void MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { - const TargetRegisterClass *OldRC = VRegInfo[Reg].first; VRegInfo[Reg].first = RC; - - // Remove from old register class's vregs list. This may be slow but - // fortunately this operation is rarely needed. - std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()]; - std::vector<unsigned>::iterator I = - std::find(VRegs.begin(), VRegs.end(), Reg); - VRegs.erase(I); - - // Add to new register class's vregs list. - RegClass2VRegMap[RC->getID()].push_back(Reg); } const TargetRegisterClass * @@ -95,7 +82,6 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase) // The vector reallocated, handle this now. HandleVRegListReallocation(); - RegClass2VRegMap[RegClass->getID()].push_back(Reg); return Reg; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 471463b..7a55852 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -62,6 +62,7 @@ namespace { raw_ostream *OS; const MachineFunction *MF; const TargetMachine *TM; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; @@ -255,6 +256,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; TM = &MF.getTarget(); + TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); MRI = &MF.getRegInfo(); @@ -387,8 +389,6 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i, void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - // Count the number of landing pad successors. SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), @@ -541,19 +541,19 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { - const TargetInstrDesc &TI = MI->getDesc(); - if (MI->getNumOperands() < TI.getNumOperands()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); - *OS << TI.getNumOperands() << " operands expected, but " + *OS << MCID.getNumOperands() << " operands expected, but " << MI->getNumExplicitOperands() << " given.\n"; } // Check the MachineMemOperands for basic consistency. for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I) { - if ((*I)->isLoad() && !TI.mayLoad()) + if ((*I)->isLoad() && !MCID.mayLoad()) report("Missing mayLoad flag", MI); - if ((*I)->isStore() && !TI.mayStore()) + if ((*I)->isStore() && !MCID.mayStore()) report("Missing mayStore flag", MI); } @@ -575,29 +575,30 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { void MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); - const TargetInstrDesc &TI = MI->getDesc(); - const TargetOperandInfo &TOI = TI.OpInfo[MONum]; + const MCInstrDesc &MCID = MI->getDesc(); + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; - // The first TI.NumDefs operands must be explicit register defines - if (MONum < TI.getNumDefs()) { + // The first MCID.NumDefs operands must be explicit register defines + if (MONum < MCID.getNumDefs()) { if (!MO->isReg()) report("Explicit definition must be a register", MO, MONum); else if (!MO->isDef()) report("Explicit definition marked as use", MO, MONum); else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); - } else if (MONum < TI.getNumOperands()) { + } else if (MONum < MCID.getNumOperands()) { // Don't check if it's the last operand in a variadic instruction. See, // e.g., LDM_RET in the arm back end. - if (MO->isReg() && !(TI.isVariadic() && MONum == TI.getNumOperands()-1)) { - if (MO->isDef() && !TOI.isOptionalDef()) + if (MO->isReg() && + !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) { + if (MO->isDef() && !MCOI.isOptionalDef()) report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } } else { // ARM adds %reg0 operands to indicate predicates. We'll allow that. - if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic() && MO->getReg()) + if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg()) report("Extra explicit operand on non-variadic instruction", MO, MONum); } @@ -709,7 +710,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } // Check register classes. - if (MONum < TI.getNumOperands() && !MO->isImplicit()) { + if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { unsigned SubIdx = MO->getSubReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -723,7 +724,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } sr = s; } - if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { + if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { if (!DRC->contains(sr)) { report("Illegal physical register for instruction", MO, MONum); *OS << TRI->getName(sr) << " is not a " @@ -743,7 +744,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } RC = SRC; } - if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { + if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { if (!RC->hasSuperClassEq(DRC)) { report("Illegal virtual register for instruction", MO, MONum); *OS << "Expected a " << DRC->getName() << " register, but got a " @@ -765,11 +766,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { LiveInts && !LiveInts->isNotInMIMap(MI)) { LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); SlotIndex Idx = LiveInts->getInstructionIndex(MI); - if (TI.mayLoad() && !LI.liveAt(Idx.getUseIndex())) { + if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) { report("Instruction loads from dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } - if (TI.mayStore() && !LI.liveAt(Idx.getDefIndex())) { + if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) { report("Instruction stores to dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index c105bb0..c523e39 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -353,10 +353,10 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isMoveImmediate()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isMoveImmediate()) return false; - if (TID.getNumDefs() != 1) + if (MCID.getNumDefs() != 1) return false; unsigned Reg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -429,16 +429,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); - if (TID.isBitcast()) { + if (MCID.isBitcast()) { if (OptimizeBitcastInstr(MI, MBB)) { // MI is deleted. Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; } - } else if (TID.isCompare()) { + } else if (MCID.isCompare()) { if (OptimizeCmpInstr(MI, MBB)) { // MI is deleted. Changed = true; diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index ba8501f..c73e877 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -22,6 +22,7 @@ #include "AntiDepBreaker.h" #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" +#include "RegisterClassInfo.h" #include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" @@ -37,7 +38,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -52,7 +53,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls"); STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies"); // Post-RA scheduling is enabled with -// TargetSubtarget.enablePostRAScheduler(). This flag can be used to +// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to // override the target. static cl::opt<bool> EnablePostRAScheduler("post-RA-scheduler", @@ -80,6 +81,7 @@ namespace { class PostRAScheduler : public MachineFunctionPass { AliasAnalysis *AA; const TargetInstrInfo *TII; + RegisterClassInfo RegClassInfo; CodeGenOpt::Level OptLevel; public: @@ -135,7 +137,8 @@ namespace { public: SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + AliasAnalysis *AA, const RegisterClassInfo&, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs); ~SchedulePostRATDList(); @@ -179,7 +182,8 @@ namespace { SchedulePostRATDList::SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + AliasAnalysis *AA, const RegisterClassInfo &RCI, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs) : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA), KillIndices(TRI->getNumRegs()) @@ -189,10 +193,10 @@ SchedulePostRATDList::SchedulePostRATDList( HazardRec = TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); AntiDepBreak = - ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? - (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) : - ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? - (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL)); + ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ? + (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : + ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ? + (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL)); } SchedulePostRATDList::~SchedulePostRATDList() { @@ -205,9 +209,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); + RegClassInfo.runOnMachineFunction(Fn); // Check for explicit enable/disable of post-ra scheduling. - TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE; SmallVector<TargetRegisterClass*, 4> CriticalPathRCs; if (EnablePostRAScheduler.getPosition() > 0) { if (!EnablePostRAScheduler) @@ -215,22 +220,23 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { } else { // Check that post-RA scheduling is enabled for this target. // This may upgrade the AntiDepMode. - const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>(); + const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>(); if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs)) return false; } // Check for antidep breaking override... if (EnableAntiDepBreaking.getPosition() > 0) { - AntiDepMode = (EnableAntiDepBreaking == "all") ? - TargetSubtarget::ANTIDEP_ALL : - (EnableAntiDepBreaking == "critical") - ? TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE; + AntiDepMode = (EnableAntiDepBreaking == "all") + ? TargetSubtargetInfo::ANTIDEP_ALL + : ((EnableAntiDepBreaking == "critical") + ? TargetSubtargetInfo::ANTIDEP_CRITICAL + : TargetSubtargetInfo::ANTIDEP_NONE); } DEBUG(dbgs() << "PostRAScheduler\n"); - SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode, + SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode, CriticalPathRCs); // Loop over all of the basic blocks diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp deleted file mode 100644 index d6e31da..0000000 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ /dev/null @@ -1,1430 +0,0 @@ -//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the machine instruction level pre-register allocation -// live interval splitting pass. It finds live interval barriers, i.e. -// instructions which will kill all physical registers in certain register -// classes, and split all live intervals which cross the barrier. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pre-alloc-split" -#include "VirtRegMap.h" -#include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterCoalescer.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden); -static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), - cl::Hidden); -static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), - cl::Hidden); - -STATISTIC(NumSplits, "Number of intervals split"); -STATISTIC(NumRemats, "Number of intervals split by rematerialization"); -STATISTIC(NumFolds, "Number of intervals split with spill folding"); -STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding"); -STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers"); -STATISTIC(NumDeadSpills, "Number of dead spills removed"); - -namespace { - class PreAllocSplitting : public MachineFunctionPass { - MachineFunction *CurrMF; - const TargetMachine *TM; - const TargetInstrInfo *TII; - const TargetRegisterInfo* TRI; - MachineFrameInfo *MFI; - MachineRegisterInfo *MRI; - SlotIndexes *SIs; - LiveIntervals *LIs; - LiveStacks *LSs; - VirtRegMap *VRM; - - // Barrier - Current barrier being processed. - MachineInstr *Barrier; - - // BarrierMBB - Basic block where the barrier resides in. - MachineBasicBlock *BarrierMBB; - - // Barrier - Current barrier index. - SlotIndex BarrierIdx; - - // CurrLI - Current live interval being split. - LiveInterval *CurrLI; - - // CurrSLI - Current stack slot live interval. - LiveInterval *CurrSLI; - - // CurrSValNo - Current val# for the stack slot live interval. - VNInfo *CurrSValNo; - - // IntervalSSMap - A map from live interval to spill slots. - DenseMap<unsigned, int> IntervalSSMap; - - // Def2SpillMap - A map from a def instruction index to spill index. - DenseMap<SlotIndex, SlotIndex> Def2SpillMap; - - public: - static char ID; - PreAllocSplitting() : MachineFunctionPass(ID) { - initializePreAllocSplittingPass(*PassRegistry::getPassRegistry()); - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - AU.addRequired<LiveStacks>(); - AU.addPreserved<LiveStacks>(); - AU.addPreserved<RegisterCoalescer>(); - AU.addPreserved<CalculateSpillWeights>(); - AU.addPreservedID(StrongPHIEliminationID); - AU.addPreservedID(PHIEliminationID); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); - AU.addRequired<VirtRegMap>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addPreserved<MachineLoopInfo>(); - AU.addPreserved<VirtRegMap>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual void releaseMemory() { - IntervalSSMap.clear(); - Def2SpillMap.clear(); - } - - virtual const char *getPassName() const { - return "Pre-Register Allocaton Live Interval Splitting"; - } - - /// print - Implement the dump method. - virtual void print(raw_ostream &O, const Module* M = 0) const { - LIs->print(O, M); - } - - - private: - - MachineBasicBlock::iterator - findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*, - SmallPtrSet<MachineInstr*, 4>&); - - MachineBasicBlock::iterator - findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex, - SmallPtrSet<MachineInstr*, 4>&); - - int CreateSpillStackSlot(unsigned, const TargetRegisterClass *); - - bool IsAvailableInStack(MachineBasicBlock*, unsigned, - SlotIndex, SlotIndex, - SlotIndex&, int&) const; - - void UpdateSpillSlotInterval(VNInfo*, SlotIndex, SlotIndex); - - bool SplitRegLiveInterval(LiveInterval*); - - bool SplitRegLiveIntervals(const TargetRegisterClass **, - SmallPtrSet<LiveInterval*, 8>&); - - bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB, - MachineBasicBlock* BarrierMBB); - bool Rematerialize(unsigned vreg, VNInfo* ValNo, - MachineInstr* DefMI, - MachineBasicBlock::iterator RestorePt, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB); - MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC, - MachineInstr* DefMI, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int& SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB); - MachineInstr* FoldRestore(unsigned vreg, - const TargetRegisterClass* RC, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB); - void RenumberValno(VNInfo* VN); - void ReconstructLiveInterval(LiveInterval* LI); - bool removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split); - unsigned getNumberOfNonSpills(SmallPtrSet<MachineInstr*, 4>& MIs, - unsigned Reg, int FrameIndex, bool& TwoAddr); - VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock); - VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock); -}; -} // end anonymous namespace - -char PreAllocSplitting::ID = 0; - -INITIALIZE_PASS_BEGIN(PreAllocSplitting, "pre-alloc-splitting", - "Pre-Register Allocation Live Interval Splitting", - false, false) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(LiveStacks) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_PASS_END(PreAllocSplitting, "pre-alloc-splitting", - "Pre-Register Allocation Live Interval Splitting", - false, false) - -char &llvm::PreAllocSplittingID = PreAllocSplitting::ID; - -/// findSpillPoint - Find a gap as far away from the given MI that's suitable -/// for spilling the current live interval. The index must be before any -/// defs and uses of the live interval register in the mbb. Return begin() if -/// none is found. -MachineBasicBlock::iterator -PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, - MachineInstr *DefMI, - SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { - MachineBasicBlock::iterator Pt = MBB->begin(); - - MachineBasicBlock::iterator MII = MI; - MachineBasicBlock::iterator EndPt = DefMI - ? MachineBasicBlock::iterator(DefMI) : MBB->begin(); - - while (MII != EndPt && !RefsInMBB.count(MII) && - MII->getOpcode() != TRI->getCallFrameSetupOpcode()) - --MII; - if (MII == EndPt || RefsInMBB.count(MII)) return Pt; - - while (MII != EndPt && !RefsInMBB.count(MII)) { - // We can't insert the spill between the barrier (a call), and its - // corresponding call frame setup. - if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) { - while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) { - --MII; - if (MII == EndPt) { - return Pt; - } - } - continue; - } else { - Pt = MII; - } - - if (RefsInMBB.count(MII)) - return Pt; - - - --MII; - } - - return Pt; -} - -/// findRestorePoint - Find a gap in the instruction index map that's suitable -/// for restoring the current live interval value. The index must be before any -/// uses of the live interval register in the mbb. Return end() if none is -/// found. -MachineBasicBlock::iterator -PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, - SlotIndex LastIdx, - SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { - // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb - // begin index accordingly. - MachineBasicBlock::iterator Pt = MBB->end(); - MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator(); - - // We start at the call, so walk forward until we find the call frame teardown - // since we can't insert restores before that. Bail if we encounter a use - // during this time. - MachineBasicBlock::iterator MII = MI; - if (MII == EndPt) return Pt; - - while (MII != EndPt && !RefsInMBB.count(MII) && - MII->getOpcode() != TRI->getCallFrameDestroyOpcode()) - ++MII; - if (MII == EndPt || RefsInMBB.count(MII)) return Pt; - ++MII; - - // FIXME: Limit the number of instructions to examine to reduce - // compile time? - while (MII != EndPt) { - SlotIndex Index = LIs->getInstructionIndex(MII); - if (Index > LastIdx) - break; - - // We can't insert a restore between the barrier (a call) and its - // corresponding call frame teardown. - if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) { - do { - if (MII == EndPt || RefsInMBB.count(MII)) return Pt; - ++MII; - } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode()); - } else { - Pt = MII; - } - - if (RefsInMBB.count(MII)) - return Pt; - - ++MII; - } - - return Pt; -} - -/// CreateSpillStackSlot - Create a stack slot for the live interval being -/// split. If the live interval was previously split, just reuse the same -/// slot. -int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, - const TargetRegisterClass *RC) { - int SS; - DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg); - if (I != IntervalSSMap.end()) { - SS = I->second; - } else { - SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); - IntervalSSMap[Reg] = SS; - } - - // Create live interval for stack slot. - CurrSLI = &LSs->getOrCreateInterval(SS, RC); - if (CurrSLI->hasAtLeastOneValue()) - CurrSValNo = CurrSLI->getValNumInfo(0); - else - CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, - LSs->getVNInfoAllocator()); - return SS; -} - -/// IsAvailableInStack - Return true if register is available in a split stack -/// slot at the specified index. -bool -PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, - unsigned Reg, SlotIndex DefIndex, - SlotIndex RestoreIndex, - SlotIndex &SpillIndex, - int& SS) const { - if (!DefMBB) - return false; - - DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg); - if (I == IntervalSSMap.end()) - return false; - DenseMap<SlotIndex, SlotIndex>::const_iterator - II = Def2SpillMap.find(DefIndex); - if (II == Def2SpillMap.end()) - return false; - - // If last spill of def is in the same mbb as barrier mbb (where restore will - // be), make sure it's not below the intended restore index. - // FIXME: Undo the previous spill? - assert(LIs->getMBBFromIndex(II->second) == DefMBB); - if (DefMBB == BarrierMBB && II->second >= RestoreIndex) - return false; - - SS = I->second; - SpillIndex = II->second; - return true; -} - -/// UpdateSpillSlotInterval - Given the specified val# of the register live -/// interval being split, and the spill and restore indicies, update the live -/// interval of the spill stack slot. -void -PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, SlotIndex SpillIndex, - SlotIndex RestoreIndex) { - assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB && - "Expect restore in the barrier mbb"); - - MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex); - if (MBB == BarrierMBB) { - // Intra-block spill + restore. We are done. - LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo); - CurrSLI->addRange(SLR); - return; - } - - SmallPtrSet<MachineBasicBlock*, 4> Processed; - SlotIndex EndIdx = LIs->getMBBEndIdx(MBB); - LiveRange SLR(SpillIndex, EndIdx, CurrSValNo); - CurrSLI->addRange(SLR); - Processed.insert(MBB); - - // Start from the spill mbb, figure out the extend of the spill slot's - // live interval. - SmallVector<MachineBasicBlock*, 4> WorkList; - const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex); - if (LR->end > EndIdx) - // If live range extend beyond end of mbb, add successors to work list. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - WorkList.push_back(*SI); - - while (!WorkList.empty()) { - MachineBasicBlock *MBB = WorkList.back(); - WorkList.pop_back(); - if (Processed.count(MBB)) - continue; - SlotIndex Idx = LIs->getMBBStartIdx(MBB); - LR = CurrLI->getLiveRangeContaining(Idx); - if (LR && LR->valno == ValNo) { - EndIdx = LIs->getMBBEndIdx(MBB); - if (Idx <= RestoreIndex && RestoreIndex < EndIdx) { - // Spill slot live interval stops at the restore. - LiveRange SLR(Idx, RestoreIndex, CurrSValNo); - CurrSLI->addRange(SLR); - } else if (LR->end > EndIdx) { - // Live range extends beyond end of mbb, process successors. - LiveRange SLR(Idx, EndIdx.getNextIndex(), CurrSValNo); - CurrSLI->addRange(SLR); - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - WorkList.push_back(*SI); - } else { - LiveRange SLR(Idx, LR->end, CurrSValNo); - CurrSLI->addRange(SLR); - } - Processed.insert(MBB); - } - } -} - -/// PerformPHIConstruction - From properly set up use and def lists, use a PHI -/// construction algorithm to compute the ranges and valnos for an interval. -VNInfo* -PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock) { - // Return memoized result if it's available. - if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI)) - return NewVNs[UseI]; - else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI)) - return NewVNs[UseI]; - else if (!IsIntraBlock && LiveOut.count(MBB)) - return LiveOut[MBB]; - - // Check if our block contains any uses or defs. - bool ContainsDefs = Defs.count(MBB); - bool ContainsUses = Uses.count(MBB); - - VNInfo* RetVNI = 0; - - // Enumerate the cases of use/def contaning blocks. - if (!ContainsDefs && !ContainsUses) { - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses, - NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - } else if (ContainsDefs && !ContainsUses) { - SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; - - // Search for the def in this block. If we don't find it before the - // instruction we care about, go to the fallback case. Note that that - // should never happen: this cannot be intrablock, so use should - // always be an end() iterator. - assert(UseI == MBB->end() && "No use marked in intrablock"); - - MachineBasicBlock::iterator Walker = UseI; - --Walker; - while (Walker != MBB->begin()) { - if (BlockDefs.count(Walker)) - break; - --Walker; - } - - // Once we've found it, extend its VNInfo to our instruction. - SlotIndex DefIndex = LIs->getInstructionIndex(Walker); - DefIndex = DefIndex.getDefIndex(); - SlotIndex EndIndex = LIs->getMBBEndIdx(MBB); - - RetVNI = NewVNs[Walker]; - LI->addRange(LiveRange(DefIndex, EndIndex, RetVNI)); - } else if (!ContainsDefs && ContainsUses) { - SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; - - // Search for the use in this block that precedes the instruction we care - // about, going to the fallback case if we don't find it. - MachineBasicBlock::iterator Walker = UseI; - bool found = false; - while (Walker != MBB->begin()) { - --Walker; - if (BlockUses.count(Walker)) { - found = true; - break; - } - } - - if (!found) - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - - SlotIndex UseIndex = LIs->getInstructionIndex(Walker); - UseIndex = UseIndex.getUseIndex(); - SlotIndex EndIndex; - if (IsIntraBlock) { - EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); - } else - EndIndex = LIs->getMBBEndIdx(MBB); - - // Now, recursively phi construct the VNInfo for the use we found, - // and then extend it to include the instruction we care about - RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, - NewVNs, LiveOut, Phis, false, true); - - LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI)); - - // FIXME: Need to set kills properly for inter-block stuff. - } else if (ContainsDefs && ContainsUses) { - SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; - SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; - - // This case is basically a merging of the two preceding case, with the - // special note that checking for defs must take precedence over checking - // for uses, because of two-address instructions. - MachineBasicBlock::iterator Walker = UseI; - bool foundDef = false; - bool foundUse = false; - while (Walker != MBB->begin()) { - --Walker; - if (BlockDefs.count(Walker)) { - foundDef = true; - break; - } else if (BlockUses.count(Walker)) { - foundUse = true; - break; - } - } - - if (!foundDef && !foundUse) - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - - SlotIndex StartIndex = LIs->getInstructionIndex(Walker); - StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex(); - SlotIndex EndIndex; - if (IsIntraBlock) { - EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); - } else - EndIndex = LIs->getMBBEndIdx(MBB); - - if (foundDef) - RetVNI = NewVNs[Walker]; - else - RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, - NewVNs, LiveOut, Phis, false, true); - - LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - } - - // Memoize results so we don't have to recompute them. - if (!IsIntraBlock) LiveOut[MBB] = RetVNI; - else { - if (!NewVNs.count(UseI)) - NewVNs[UseI] = RetVNI; - Visited.insert(UseI); - } - - return RetVNI; -} - -/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path. -/// -VNInfo* -PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock) { - // NOTE: Because this is the fallback case from other cases, we do NOT - // assume that we are not intrablock here. - if (Phis.count(MBB)) return Phis[MBB]; - - SlotIndex StartIndex = LIs->getMBBStartIdx(MBB); - VNInfo *RetVNI = Phis[MBB] = - LI->getNextValue(SlotIndex(), /*FIXME*/ 0, - LIs->getVNInfoAllocator()); - - if (!IsIntraBlock) LiveOut[MBB] = RetVNI; - - // If there are no uses or defs between our starting point and the - // beginning of the block, then recursive perform phi construction - // on our predecessors. - DenseMap<MachineBasicBlock*, VNInfo*> IncomingVNs; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI, - Visited, Defs, Uses, NewVNs, - LiveOut, Phis, false, false); - if (Incoming != 0) - IncomingVNs[*PI] = Incoming; - } - - if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) { - VNInfo* OldVN = RetVNI; - VNInfo* NewVN = IncomingVNs.begin()->second; - VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN); - if (MergedVN == OldVN) std::swap(OldVN, NewVN); - - for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator LOI = LiveOut.begin(), - LOE = LiveOut.end(); LOI != LOE; ++LOI) - if (LOI->second == OldVN) - LOI->second = MergedVN; - for (DenseMap<MachineInstr*, VNInfo*>::iterator NVI = NewVNs.begin(), - NVE = NewVNs.end(); NVI != NVE; ++NVI) - if (NVI->second == OldVN) - NVI->second = MergedVN; - for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator PI = Phis.begin(), - PE = Phis.end(); PI != PE; ++PI) - if (PI->second == OldVN) - PI->second = MergedVN; - RetVNI = MergedVN; - } else { - // Otherwise, merge the incoming VNInfos with a phi join. Create a new - // VNInfo to represent the joined value. - for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I = - IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { - I->second->setHasPHIKill(true); - } - } - - SlotIndex EndIndex; - if (IsIntraBlock) { - EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); - } else - EndIndex = LIs->getMBBEndIdx(MBB); - LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - - // Memoize results so we don't have to recompute them. - if (!IsIntraBlock) - LiveOut[MBB] = RetVNI; - else { - if (!NewVNs.count(UseI)) - NewVNs[UseI] = RetVNI; - Visited.insert(UseI); - } - - return RetVNI; -} - -/// ReconstructLiveInterval - Recompute a live interval from scratch. -void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { - VNInfo::Allocator& Alloc = LIs->getVNInfoAllocator(); - - // Clear the old ranges and valnos; - LI->clear(); - - // Cache the uses and defs of the register - typedef DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> > RegMap; - RegMap Defs, Uses; - - // Keep track of the new VNs we're creating. - DenseMap<MachineInstr*, VNInfo*> NewVNs; - SmallPtrSet<VNInfo*, 2> PhiVNs; - - // Cache defs, and create a new VNInfo for each def. - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), - DE = MRI->def_end(); DI != DE; ++DI) { - Defs[(*DI).getParent()].insert(&*DI); - - SlotIndex DefIdx = LIs->getInstructionIndex(&*DI); - DefIdx = DefIdx.getDefIndex(); - - assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting."); - VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc); - - // If the def is a move, set the copy field. - if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) - NewVN->setCopy(&*DI); - - NewVNs[&*DI] = NewVN; - } - - // Cache uses as a separate pass from actually processing them. - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), - UE = MRI->use_end(); UI != UE; ++UI) - Uses[(*UI).getParent()].insert(&*UI); - - // Now, actually process every use and use a phi construction algorithm - // to walk from it to its reaching definitions, building VNInfos along - // the way. - DenseMap<MachineBasicBlock*, VNInfo*> LiveOut; - DenseMap<MachineBasicBlock*, VNInfo*> Phis; - SmallPtrSet<MachineInstr*, 4> Visited; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), - UE = MRI->use_end(); UI != UE; ++UI) { - PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, true, true); - } - - // Add ranges for dead defs - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), - DE = MRI->def_end(); DI != DE; ++DI) { - SlotIndex DefIdx = LIs->getInstructionIndex(&*DI); - DefIdx = DefIdx.getDefIndex(); - - if (LI->liveAt(DefIdx)) continue; - - VNInfo* DeadVN = NewVNs[&*DI]; - LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN)); - } -} - -/// RenumberValno - Split the given valno out into a new vreg, allowing it to -/// be allocated to a different register. This function creates a new vreg, -/// copies the valno and its live ranges over to the new vreg's interval, -/// removes them from the old interval, and rewrites all uses and defs of -/// the original reg to the new vreg within those ranges. -void PreAllocSplitting::RenumberValno(VNInfo* VN) { - SmallVector<VNInfo*, 4> Stack; - SmallVector<VNInfo*, 4> VNsToCopy; - Stack.push_back(VN); - - // Walk through and copy the valno we care about, and any other valnos - // that are two-address redefinitions of the one we care about. These - // will need to be rewritten as well. We also check for safety of the - // renumbering here, by making sure that none of the valno involved has - // phi kills. - while (!Stack.empty()) { - VNInfo* OldVN = Stack.back(); - Stack.pop_back(); - - // Bail out if we ever encounter a valno that has a PHI kill. We can't - // renumber these. - if (OldVN->hasPHIKill()) return; - - VNsToCopy.push_back(OldVN); - - // Locate two-address redefinitions - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg), - DE = MRI->def_end(); DI != DE; ++DI) { - if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue; - SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex(); - VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx); - if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) != - VNsToCopy.end()) - Stack.push_back(NextVN); - } - } - - // Create the new vreg - unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg)); - - // Create the new live interval - LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg); - - for (SmallVector<VNInfo*, 4>::iterator OI = VNsToCopy.begin(), OE = - VNsToCopy.end(); OI != OE; ++OI) { - VNInfo* OldVN = *OI; - - // Copy the valno over - VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator()); - NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN); - - // Remove the valno from the old interval - CurrLI->removeValNo(OldVN); - } - - // Rewrite defs and uses. This is done in two stages to avoid invalidating - // the reg_iterator. - SmallVector<std::pair<MachineInstr*, unsigned>, 8> OpsToChange; - - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), - E = MRI->reg_end(); I != E; ++I) { - MachineOperand& MO = I.getOperand(); - SlotIndex InstrIdx = LIs->getInstructionIndex(&*I); - - if ((MO.isUse() && NewLI.liveAt(InstrIdx.getUseIndex())) || - (MO.isDef() && NewLI.liveAt(InstrIdx.getDefIndex()))) - OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo())); - } - - for (SmallVector<std::pair<MachineInstr*, unsigned>, 8>::iterator I = - OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) { - MachineInstr* Inst = I->first; - unsigned OpIdx = I->second; - MachineOperand& MO = Inst->getOperand(OpIdx); - MO.setReg(NewVReg); - } - - // Grow the VirtRegMap, since we've created a new vreg. - VRM->grow(); - - // The renumbered vreg shares a stack slot with the old register. - if (IntervalSSMap.count(CurrLI->reg)) - IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg]; - - ++NumRenumbers; -} - -bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, - MachineInstr* DefMI, - MachineBasicBlock::iterator RestorePt, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { - MachineBasicBlock& MBB = *RestorePt->getParent(); - - MachineBasicBlock::iterator KillPt = BarrierMBB->end(); - if (!DefMI || DefMI->getParent() == BarrierMBB) - KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); - else - KillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); - - if (KillPt == DefMI->getParent()->end()) - return false; - - TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI); - SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt)); - - ReconstructLiveInterval(CurrLI); - RematIdx = RematIdx.getDefIndex(); - RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx)); - - ++NumSplits; - ++NumRemats; - return true; -} - -MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, - const TargetRegisterClass* RC, - MachineInstr* DefMI, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int& SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { - // Go top down if RefsInMBB is empty. - if (RefsInMBB.empty()) - return 0; - - MachineBasicBlock::iterator FoldPt = Barrier; - while (&*FoldPt != DefMI && FoldPt != MBB->begin() && - !RefsInMBB.count(FoldPt)) - --FoldPt; - - int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg); - if (OpIdx == -1) - return 0; - - SmallVector<unsigned, 1> Ops; - Ops.push_back(OpIdx); - - if (!TII->canFoldMemoryOperand(FoldPt, Ops)) - return 0; - - DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(vreg); - if (I != IntervalSSMap.end()) { - SS = I->second; - } else { - SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); - } - - MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); - - if (FMI) { - LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FoldPt->eraseFromParent(); - ++NumFolds; - - IntervalSSMap[vreg] = SS; - CurrSLI = &LSs->getOrCreateInterval(SS, RC); - if (CurrSLI->hasAtLeastOneValue()) - CurrSValNo = CurrSLI->getValNumInfo(0); - else - CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, - LSs->getVNInfoAllocator()); - } - - return FMI; -} - -MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, - const TargetRegisterClass* RC, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { - if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds) - return 0; - - // Go top down if RefsInMBB is empty. - if (RefsInMBB.empty()) - return 0; - - // Can't fold a restore between a call stack setup and teardown. - MachineBasicBlock::iterator FoldPt = Barrier; - - // Advance from barrier to call frame teardown. - while (FoldPt != MBB->getFirstTerminator() && - FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { - if (RefsInMBB.count(FoldPt)) - return 0; - - ++FoldPt; - } - - if (FoldPt == MBB->getFirstTerminator()) - return 0; - else - ++FoldPt; - - // Now find the restore point. - while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) { - if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) { - while (FoldPt != MBB->getFirstTerminator() && - FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { - if (RefsInMBB.count(FoldPt)) - return 0; - - ++FoldPt; - } - - if (FoldPt == MBB->getFirstTerminator()) - return 0; - } - - ++FoldPt; - } - - if (FoldPt == MBB->getFirstTerminator()) - return 0; - - int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true); - if (OpIdx == -1) - return 0; - - SmallVector<unsigned, 1> Ops; - Ops.push_back(OpIdx); - - if (!TII->canFoldMemoryOperand(FoldPt, Ops)) - return 0; - - MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); - - if (FMI) { - LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FoldPt->eraseFromParent(); - ++NumRestoreFolds; - } - - return FMI; -} - -/// SplitRegLiveInterval - Split (spill and restore) the given live interval -/// so it would not cross the barrier that's being processed. Shrink wrap -/// (minimize) the live interval to the last uses. -bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { - DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier - << " result: "); - - CurrLI = LI; - - // Find live range where current interval cross the barrier. - LiveInterval::iterator LR = - CurrLI->FindLiveRangeContaining(BarrierIdx.getUseIndex()); - VNInfo *ValNo = LR->valno; - - assert(!ValNo->isUnused() && "Val# is defined by a dead def?"); - - MachineInstr *DefMI = LIs->getInstructionFromIndex(ValNo->def); - - // If this would create a new join point, do not split. - if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) { - DEBUG(dbgs() << "FAILED (would create a new join point).\n"); - return false; - } - - // Find all references in the barrier mbb. - SmallPtrSet<MachineInstr*, 4> RefsInMBB; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), - E = MRI->reg_end(); I != E; ++I) { - MachineInstr *RefMI = &*I; - if (RefMI->getParent() == BarrierMBB) - RefsInMBB.insert(RefMI); - } - - // Find a point to restore the value after the barrier. - MachineBasicBlock::iterator RestorePt = - findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB); - if (RestorePt == BarrierMBB->end()) { - DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n"); - return false; - } - - if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI)) - if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) { - DEBUG(dbgs() << "success (remat).\n"); - return true; - } - - // Add a spill either before the barrier or after the definition. - MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL; - const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg); - SlotIndex SpillIndex; - MachineInstr *SpillMI = NULL; - int SS = -1; - if (!DefMI) { - // If we don't know where the def is we must split just before the barrier. - if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier, - BarrierMBB, SS, RefsInMBB))) { - SpillIndex = LIs->getInstructionIndex(SpillMI); - } else { - MachineBasicBlock::iterator SpillPt = - findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); - if (SpillPt == BarrierMBB->begin()) { - DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); - return false; // No gap to insert spill. - } - // Add spill. - - SS = CreateSpillStackSlot(CurrLI->reg, RC); - TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC, - TRI); - SpillMI = prior(SpillPt); - SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); - } - } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def, - LIs->getZeroIndex(), SpillIndex, SS)) { - // If it's already split, just restore the value. There is no need to spill - // the def again. - if (!DefMI) { - DEBUG(dbgs() << "FAILED (def is dead).\n"); - return false; // Def is dead. Do nothing. - } - - if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier, - BarrierMBB, SS, RefsInMBB))) { - SpillIndex = LIs->getInstructionIndex(SpillMI); - } else { - // Check if it's possible to insert a spill after the def MI. - MachineBasicBlock::iterator SpillPt; - if (DefMBB == BarrierMBB) { - // Add spill after the def and the last use before the barrier. - SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI, - RefsInMBB); - if (SpillPt == DefMBB->begin()) { - DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); - return false; // No gap to insert spill. - } - } else { - SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); - if (SpillPt == DefMBB->end()) { - DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); - return false; // No gap to insert spill. - } - } - // Add spill. - SS = CreateSpillStackSlot(CurrLI->reg, RC); - TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC, - TRI); - SpillMI = prior(SpillPt); - SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); - } - } - - // Remember def instruction index to spill index mapping. - if (DefMI && SpillMI) - Def2SpillMap[ValNo->def] = SpillIndex; - - // Add restore. - bool FoldedRestore = false; - SlotIndex RestoreIndex; - if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier, - BarrierMBB, SS, RefsInMBB)) { - RestorePt = LMI; - RestoreIndex = LIs->getInstructionIndex(RestorePt); - FoldedRestore = true; - } else { - TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC, TRI); - MachineInstr *LoadMI = prior(RestorePt); - RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI); - } - - // Update spill stack slot live interval. - UpdateSpillSlotInterval(ValNo, SpillIndex.getUseIndex().getNextSlot(), - RestoreIndex.getDefIndex()); - - ReconstructLiveInterval(CurrLI); - - if (!FoldedRestore) { - SlotIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt)); - RestoreIdx = RestoreIdx.getDefIndex(); - RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx)); - } - - ++NumSplits; - DEBUG(dbgs() << "success.\n"); - return true; -} - -/// SplitRegLiveIntervals - Split all register live intervals that cross the -/// barrier that's being processed. -bool -PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs, - SmallPtrSet<LiveInterval*, 8>& Split) { - // First find all the virtual registers whose live intervals are intercepted - // by the current barrier. - SmallVector<LiveInterval*, 8> Intervals; - for (const TargetRegisterClass **RC = RCs; *RC; ++RC) { - // FIXME: If it's not safe to move any instruction that defines the barrier - // register class, then it means there are some special dependencies which - // codegen is not modelling. Ignore these barriers for now. - if (!TII->isSafeToMoveRegClassDefs(*RC)) - continue; - const std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC); - for (unsigned i = 0, e = VRs.size(); i != e; ++i) { - unsigned Reg = VRs[i]; - if (!LIs->hasInterval(Reg)) - continue; - LiveInterval *LI = &LIs->getInterval(Reg); - if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg)) - // Virtual register live interval is intercepted by the barrier. We - // should split and shrink wrap its interval if possible. - Intervals.push_back(LI); - } - } - - // Process the affected live intervals. - bool Change = false; - while (!Intervals.empty()) { - if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit) - break; - LiveInterval *LI = Intervals.back(); - Intervals.pop_back(); - bool result = SplitRegLiveInterval(LI); - if (result) Split.insert(LI); - Change |= result; - } - - return Change; -} - -unsigned PreAllocSplitting::getNumberOfNonSpills( - SmallPtrSet<MachineInstr*, 4>& MIs, - unsigned Reg, int FrameIndex, - bool& FeedsTwoAddr) { - unsigned NonSpills = 0; - for (SmallPtrSet<MachineInstr*, 4>::iterator UI = MIs.begin(), UE = MIs.end(); - UI != UE; ++UI) { - int StoreFrameIndex; - unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); - if (StoreVReg != Reg || StoreFrameIndex != FrameIndex) - ++NonSpills; - - int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg); - if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx)) - FeedsTwoAddr = true; - } - - return NonSpills; -} - -/// removeDeadSpills - After doing splitting, filter through all intervals we've -/// split, and see if any of the spills are unnecessary. If so, remove them. -bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { - bool changed = false; - - // Walk over all of the live intervals that were touched by the splitter, - // and see if we can do any DCE and/or folding. - for (SmallPtrSet<LiveInterval*, 8>::iterator LI = split.begin(), - LE = split.end(); LI != LE; ++LI) { - DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> > VNUseCount; - - // First, collect all the uses of the vreg, and sort them by their - // reaching definition (VNInfo). - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg), - UE = MRI->use_end(); UI != UE; ++UI) { - SlotIndex index = LIs->getInstructionIndex(&*UI); - index = index.getUseIndex(); - - const LiveRange* LR = (*LI)->getLiveRangeContaining(index); - VNUseCount[LR->valno].insert(&*UI); - } - - // Now, take the definitions (VNInfo's) one at a time and try to DCE - // and/or fold them away. - for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(), - VE = (*LI)->vni_end(); VI != VE; ++VI) { - - if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit) - return changed; - - VNInfo* CurrVN = *VI; - - // We don't currently try to handle definitions with PHI kills, because - // it would involve processing more than one VNInfo at once. - if (CurrVN->hasPHIKill()) continue; - - // We also don't try to handle the results of PHI joins, since there's - // no defining instruction to analyze. - MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def); - if (!DefMI || CurrVN->isUnused()) continue; - - // We're only interested in eliminating cruft introduced by the splitter, - // is of the form load-use or load-use-store. First, check that the - // definition is a load, and remember what stack slot we loaded it from. - int FrameIndex; - if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue; - - // If the definition has no uses at all, just DCE it. - if (VNUseCount[CurrVN].size() == 0) { - LIs->RemoveMachineInstrFromMaps(DefMI); - (*LI)->removeValNo(CurrVN); - DefMI->eraseFromParent(); - VNUseCount.erase(CurrVN); - ++NumDeadSpills; - changed = true; - continue; - } - - // Second, get the number of non-store uses of the definition, as well as - // a flag indicating whether it feeds into a later two-address definition. - bool FeedsTwoAddr = false; - unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN], - (*LI)->reg, FrameIndex, - FeedsTwoAddr); - - // If there's one non-store use and it doesn't feed a two-addr, then - // this is a load-use-store case that we can try to fold. - if (NonSpillCount == 1 && !FeedsTwoAddr) { - // Start by finding the non-store use MachineInstr. - SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin(); - int StoreFrameIndex; - unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); - while (UI != VNUseCount[CurrVN].end() && - (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) { - ++UI; - if (UI != VNUseCount[CurrVN].end()) - StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); - } - if (UI == VNUseCount[CurrVN].end()) continue; - - MachineInstr* use = *UI; - - // Attempt to fold it away! - int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false); - if (OpIdx == -1) continue; - SmallVector<unsigned, 1> Ops; - Ops.push_back(OpIdx); - if (!TII->canFoldMemoryOperand(use, Ops)) continue; - - MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex); - - if (!NewMI) continue; - - // Update relevant analyses. - LIs->RemoveMachineInstrFromMaps(DefMI); - LIs->ReplaceMachineInstrInMaps(use, NewMI); - (*LI)->removeValNo(CurrVN); - - DefMI->eraseFromParent(); - use->eraseFromParent(); - VNUseCount[CurrVN].erase(use); - - // Remove deleted instructions. Note that we need to remove them from - // the VNInfo->use map as well, just to be safe. - for (SmallPtrSet<MachineInstr*, 4>::iterator II = - VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end(); - II != IE; ++II) { - for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator - VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE; - ++VNI) - if (VNI->first != CurrVN) - VNI->second.erase(*II); - LIs->RemoveMachineInstrFromMaps(*II); - (*II)->eraseFromParent(); - } - - VNUseCount.erase(CurrVN); - - for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator - VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI) - if (VI->second.erase(use)) - VI->second.insert(NewMI); - - ++NumDeadSpills; - changed = true; - continue; - } - - // If there's more than one non-store instruction, we can't profitably - // fold it, so bail. - if (NonSpillCount) continue; - - // Otherwise, this is a load-store case, so DCE them. - for (SmallPtrSet<MachineInstr*, 4>::iterator UI = - VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end(); - UI != UE; ++UI) { - LIs->RemoveMachineInstrFromMaps(*UI); - (*UI)->eraseFromParent(); - } - - VNUseCount.erase(CurrVN); - - LIs->RemoveMachineInstrFromMaps(DefMI); - (*LI)->removeValNo(CurrVN); - DefMI->eraseFromParent(); - ++NumDeadSpills; - changed = true; - } - } - - return changed; -} - -bool PreAllocSplitting::createsNewJoin(LiveRange* LR, - MachineBasicBlock* DefMBB, - MachineBasicBlock* BarrierMBB) { - if (DefMBB == BarrierMBB) - return false; - - if (LR->valno->hasPHIKill()) - return false; - - SlotIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB); - if (LR->end < MBBEnd) - return false; - - MachineLoopInfo& MLI = getAnalysis<MachineLoopInfo>(); - if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB)) - return true; - - MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); - SmallPtrSet<MachineBasicBlock*, 4> Visited; - typedef std::pair<MachineBasicBlock*, - MachineBasicBlock::succ_iterator> ItPair; - SmallVector<ItPair, 4> Stack; - Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin())); - - while (!Stack.empty()) { - ItPair P = Stack.back(); - Stack.pop_back(); - - MachineBasicBlock* PredMBB = P.first; - MachineBasicBlock::succ_iterator S = P.second; - - if (S == PredMBB->succ_end()) - continue; - else if (Visited.count(*S)) { - Stack.push_back(std::make_pair(PredMBB, ++S)); - continue; - } else - Stack.push_back(std::make_pair(PredMBB, S+1)); - - MachineBasicBlock* MBB = *S; - Visited.insert(MBB); - - if (MBB == BarrierMBB) - return true; - - MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB); - MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB); - MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom(); - while (MDTN) { - if (MDTN == DefMDTN) - return true; - else if (MDTN == BarrierMDTN) - break; - MDTN = MDTN->getIDom(); - } - - MBBEnd = LIs->getMBBEndIdx(MBB); - if (LR->end > MBBEnd) - Stack.push_back(std::make_pair(MBB, MBB->succ_begin())); - } - - return false; -} - - -bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) { - CurrMF = &MF; - TM = &MF.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); - MFI = MF.getFrameInfo(); - MRI = &MF.getRegInfo(); - SIs = &getAnalysis<SlotIndexes>(); - LIs = &getAnalysis<LiveIntervals>(); - LSs = &getAnalysis<LiveStacks>(); - VRM = &getAnalysis<VirtRegMap>(); - - bool MadeChange = false; - - // Make sure blocks are numbered in order. - MF.RenumberBlocks(); - - MachineBasicBlock *Entry = MF.begin(); - SmallPtrSet<MachineBasicBlock*,16> Visited; - - SmallPtrSet<LiveInterval*, 8> Split; - - for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - BarrierMBB = *DFI; - for (MachineBasicBlock::iterator I = BarrierMBB->begin(), - E = BarrierMBB->end(); I != E; ++I) { - Barrier = &*I; - const TargetRegisterClass **BarrierRCs = - Barrier->getDesc().getRegClassBarriers(); - if (!BarrierRCs) - continue; - BarrierIdx = LIs->getInstructionIndex(Barrier); - MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split); - } - } - - MadeChange |= removeDeadSpills(Split); - - return MadeChange; -} diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index f1f3c99..a901c5f 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -145,6 +145,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); @@ -152,8 +153,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { bool AdjustsStack = MFI->adjustsStack(); // Get the function call frame set-up and tear-down instruction opcode - int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); - int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); + int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); // Early exit for targets which have no call frame setup/destroy pseudo // instructions. @@ -705,12 +706,13 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); const TargetFrameLowering *TFI = TM.getFrameLowering(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; - int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); - int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); + int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 1d77b29..5ea26ad 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -20,6 +20,7 @@ #include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" +#include "RegisterCoalescer.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -34,7 +35,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -141,7 +141,7 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); @@ -324,19 +324,21 @@ void RegAllocBase::allocatePhysRegs() { if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!" - "\nCannot allocate: " << *VirtReg; + const char *Msg = "ran out of registers during register allocation"; + // Probably caused by an inline asm. + MachineInstr *MI; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); - MachineInstr *MI = I.skipInstruction();) { - if (!MI->isInlineAsm()) - continue; - Msg << "\nPlease check your inline asm statement for " - "invalid constraints:\n"; - MI->print(Msg, &VRM->getMachineFunction().getTarget()); - } - report_fatal_error(Msg.str()); + (MI = I.skipInstruction());) + if (MI->isInlineAsm()) + break; + if (MI) + MI->emitError(Msg); + else + report_fatal_error(Msg); + // Keep going after reporting the error. + VRM->assignVirt2Phys(VirtReg->reg, + RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); + continue; } if (AvailablePhysReg) diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 9765203..b36a445 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -86,7 +86,7 @@ namespace { // that is currently available in a physical register. LiveRegMap LiveVirtRegs; - DenseMap<unsigned, MachineInstr *> LiveDbgValueMap; + DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap; // RegState - Track the state of a physical register. enum RegState { @@ -118,7 +118,7 @@ namespace { // SkippedInstrs - Descriptors of instructions whose clobber list was // ignored because all registers were spilled. It is still necessary to // mark all the clobbered registers as used by the function. - SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs; + SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs; // isBulkSpilling - This flag is set when LiveRegMap will be cleared // completely after spilling all live registers. LiveRegMap entries should @@ -272,7 +272,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // If this register is used by DBG_VALUE then insert new DBG_VALUE to // identify spilled location as the place to find corresponding variable's // value. - if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) { + SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first]; + for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { + MachineInstr *DBG = LRIDbgValues[li]; const MDNode *MDPtr = DBG->getOperand(DBG->getNumOperands()-1).getMetadata(); int64_t Offset = 0; @@ -291,9 +293,11 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = DBG->getParent(); MBB->insert(MI, NewDV); DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); - LiveDbgValueMap[LRI->first] = NewDV; } } + // Now this register is spilled there is should not be any DBG_VALUE pointing + // to this register because they are all pointing to spilled value now. + LRIDbgValues.clear(); if (SpillKill) LR.LastUse = 0; // Don't kill register again } @@ -419,7 +423,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { if (UsedInInstr.test(PhysReg)) { - DEBUG(dbgs() << "PhysReg: " << PhysReg << " is already used in instr.\n"); + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); return spillImpossible; } switch (unsigned VirtReg = PhysRegState[PhysReg]) { @@ -428,15 +432,15 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { case regFree: return 0; case regReserved: - DEBUG(dbgs() << "VirtReg: " << VirtReg << " corresponding to PhysReg: " - << PhysReg << " is reserved already.\n"); + DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding " + << PrintReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; } // This is a disabled register, add up cost of aliases. - DEBUG(dbgs() << "\tRegister: " << PhysReg << " is disabled.\n"); + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n"); unsigned Cost = 0; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { @@ -487,14 +491,12 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { // Take hint when possible. if (Hint) { - switch(calcSpillCost(Hint)) { - default: - definePhysReg(MI, Hint, regFree); - // Fall through. - case 0: + // Ignore the hint if we would have to spill a dirty register. + unsigned Cost = calcSpillCost(Hint); + if (Cost < spillDirty) { + if (Cost) + definePhysReg(MI, Hint, regFree); return assignVirtToPhysReg(LRE, Hint); - case spillImpossible: - break; } } @@ -513,7 +515,7 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { unsigned BestReg = 0, BestCost = spillImpossible; for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { unsigned Cost = calcSpillCost(*I); - DEBUG(dbgs() << "\tRegister: " << *I << "\n"); + DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n"); DEBUG(dbgs() << "\tCost: " << Cost << "\n"); DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. @@ -528,16 +530,10 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { return assignVirtToPhysReg(LRE, BestReg); } - // Nothing we can do. - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for " - << "invalid constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); + // Nothing we can do. Report an error and keep going with a bad allocation. + MI->emitError("ran out of registers during register allocation"); + definePhysReg(MI, *AO.begin(), regFree); + assignVirtToPhysReg(LRE, *AO.begin()); } /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. @@ -724,7 +720,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n"); + DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) + << " as used in instr\n"); UsedInInstr.set(Reg); } @@ -774,7 +771,7 @@ void RAFast::AllocateBasicBlock() { // Otherwise, sequentially allocate each instruction in the MBB. while (MII != MBB->end()) { MachineInstr *MI = MII++; - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); DEBUG({ dbgs() << "\n>> " << *MI << "Regs:"; for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { @@ -818,7 +815,7 @@ void RAFast::AllocateBasicBlock() { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - LiveDbgValueMap[Reg] = MI; + LiveDbgValueMap[Reg].push_back(MI); LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); if (LRI != LiveVirtRegs.end()) setPhysReg(MI, i, LRI->second.PhysReg); @@ -887,7 +884,7 @@ void RAFast::AllocateBasicBlock() { VirtOpEnd = i+1; if (MO.isUse()) { hasTiedOps = hasTiedOps || - TID.getOperandConstraint(i, TOI::TIED_TO) != -1; + MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; } else { if (MO.isEarlyClobber()) hasEarlyClobbers = true; @@ -917,7 +914,7 @@ void RAFast::AllocateBasicBlock() { // We didn't detect inline asm tied operands above, so just make this extra // pass for all inline asm. if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || - (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) { + (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { handleThroughOperands(MI, VirtDead); // Don't attempt coalescing when we have funny stuff going on. CopyDst = 0; @@ -962,7 +959,7 @@ void RAFast::AllocateBasicBlock() { } unsigned DefOpEnd = MI->getNumOperands(); - if (TID.isCall()) { + if (MCID.isCall()) { // Spill all virtregs before a call. This serves two purposes: 1. If an // exception is thrown, the landing pad is going to expect to find // registers in their spill slots, and 2. we don't have to wade through @@ -973,7 +970,7 @@ void RAFast::AllocateBasicBlock() { // The imp-defs are skipped below, but we still need to mark those // registers as used by the function. - SkippedInstrs.insert(&TID); + SkippedInstrs.insert(&MCID); } // Third scan. @@ -1059,7 +1056,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { MRI->closePhysRegsUsed(*TRI); // Add the clobber lists for all the instructions we skipped earlier. - for (SmallPtrSet<const TargetInstrDesc*, 4>::const_iterator + for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) if (const unsigned *Defs = (*I)->getImplicitDefs()) while (*Defs) diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 8d06325..e235e87 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -22,6 +22,7 @@ #include "SpillPlacement.h" #include "SplitKit.h" #include "VirtRegMap.h" +#include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" @@ -33,11 +34,9 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineLoopRanges.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -68,7 +67,6 @@ class RAGreedy : public MachineFunctionPass, LiveStacks *LS; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; - MachineLoopRanges *LoopRanges; EdgeBundles *Bundles; SpillPlacement *SpillPlacer; LiveDebugVariables *DebugVars; @@ -76,6 +74,7 @@ class RAGreedy : public MachineFunctionPass, // state std::auto_ptr<Spiller> SpillerInstance; std::priority_queue<std::pair<unsigned, unsigned> > Queue; + unsigned NextCascade; // Live ranges pass through a number of stages as we try to allocate them. // Some of the stages may also create new live ranges: @@ -101,29 +100,49 @@ class RAGreedy : public MachineFunctionPass, static const char *const StageName[]; - IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage; + // RegInfo - Keep additional information about each live range. + struct RegInfo { + LiveRangeStage Stage; + + // Cascade - Eviction loop prevention. See canEvictInterference(). + unsigned Cascade; + + RegInfo() : Stage(RS_New), Cascade(0) {} + }; + + IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; LiveRangeStage getStage(const LiveInterval &VirtReg) const { - return LiveRangeStage(LRStage[VirtReg.reg]); + return ExtraRegInfo[VirtReg.reg].Stage; + } + + void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + ExtraRegInfo[VirtReg.reg].Stage = Stage; } template<typename Iterator> void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { - LRStage.resize(MRI->getNumVirtRegs()); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); for (;Begin != End; ++Begin) { unsigned Reg = (*Begin)->reg; - if (LRStage[Reg] == RS_New) - LRStage[Reg] = NewStage; + if (ExtraRegInfo[Reg].Stage == RS_New) + ExtraRegInfo[Reg].Stage = NewStage; } } - // Eviction. Sometimes an assigned live range can be evicted without - // conditions, but other times it must be split after being evicted to avoid - // infinite loops. - enum CanEvict { - CE_Never, ///< Can never evict. - CE_Always, ///< Can always evict. - CE_WithSplit ///< Can evict only if range is also split or spilled. + /// Cost of evicting interference. + struct EvictionCost { + unsigned BrokenHints; ///< Total number of broken hints. + float MaxWeight; ///< Maximum spill weight evicted. + + EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + + bool operator<(const EvictionCost &O) const { + if (BrokenHints != O.BrokenHints) + return BrokenHints < O.BrokenHints; + return MaxWeight < O.MaxWeight; + } }; // splitting state. @@ -139,11 +158,13 @@ class RAGreedy : public MachineFunctionPass, /// Global live range splitting candidate info. struct GlobalSplitCandidate { unsigned PhysReg; + InterferenceCache::Cursor Intf; BitVector LiveBundles; SmallVector<unsigned, 8> ActiveBlocks; - void reset(unsigned Reg) { + void reset(InterferenceCache &Cache, unsigned Reg) { PhysReg = Reg; + Intf.setPhysReg(Cache, Reg); LiveBundles.clear(); ActiveBlocks.clear(); } @@ -185,13 +206,15 @@ private: float calcSpillCost(); bool addSplitConstraints(InterferenceCache::Cursor, float&); void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); - void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor); - float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor); + void growRegion(GlobalSplitCandidate &Cand); + float calcGlobalSplitCost(GlobalSplitCandidate&); void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, SmallVectorImpl<LiveInterval*>&); void calcGapWeights(unsigned, SmallVectorImpl<float>&); - CanEvict canEvict(LiveInterval &A, LiveInterval &B); - bool canEvictInterference(LiveInterval&, unsigned, float&); + bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); + bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); + void evictInterference(LiveInterval&, unsigned, + SmallVectorImpl<LiveInterval*>&); unsigned tryAssign(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); @@ -228,18 +251,17 @@ FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } -RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_New) { +RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); @@ -264,8 +286,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); - AU.addRequired<MachineLoopRanges>(); - AU.addPreserved<MachineLoopRanges>(); AU.addRequired<VirtRegMap>(); AU.addPreserved<VirtRegMap>(); AU.addRequired<EdgeBundles>(); @@ -308,13 +328,13 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { // LRE may clone a virtual register because dead code elimination causes it to // be split into connected components. Ensure that the new register gets the // same stage as the parent. - LRStage.grow(New); - LRStage[New] = LRStage[Old]; + ExtraRegInfo.grow(New); + ExtraRegInfo[New] = ExtraRegInfo[Old]; } void RAGreedy::releaseMemory() { SpillerInstance.reset(0); - LRStage.clear(); + ExtraRegInfo.clear(); GlobalCand.clear(); RegAllocBase::releaseMemory(); } @@ -328,11 +348,11 @@ void RAGreedy::enqueue(LiveInterval *LI) { "Can only enqueue virtual registers"); unsigned Prio; - LRStage.grow(Reg); - if (LRStage[Reg] == RS_New) - LRStage[Reg] = RS_First; + ExtraRegInfo.grow(Reg); + if (ExtraRegInfo[Reg].Stage == RS_New) + ExtraRegInfo[Reg].Stage = RS_First; - if (LRStage[Reg] == RS_Second) + if (ExtraRegInfo[Reg].Stage == RS_Second) // Unsplit ranges that couldn't be allocated immediately are deferred until // everything else has been allocated. Long ranges are allocated last so // they are split against realistic interference. @@ -375,7 +395,21 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, if (!PhysReg || Order.isHint(PhysReg)) return PhysReg; - // PhysReg is available. Try to evict interference from a cheaper alternative. + // PhysReg is available, but there may be a better choice. + + // If we missed a simple hint, try to cheaply evict interference from the + // preferred register. + if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) + if (Order.isHint(Hint)) { + DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); + EvictionCost MaxCost(1); + if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { + evictInterference(VirtReg, Hint, NewVRegs); + return Hint; + } + } + + // Try to evict interference from a cheaper alternative. unsigned Cost = TRI->getCostPerUse(PhysReg); // Most registers have 0 additional cost. @@ -393,31 +427,58 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// -/// canEvict - determine if A can evict the assigned live range B. The eviction -/// policy defined by this function together with the allocation order defined -/// by enqueue() decides which registers ultimately end up being split and -/// spilled. +/// shouldEvict - determine if A should evict the assigned live range B. The +/// eviction policy defined by this function together with the allocation order +/// defined by enqueue() decides which registers ultimately end up being split +/// and spilled. +/// +/// Cascade numbers are used to prevent infinite loops if this function is a +/// cyclic relation. /// -/// This function must define a non-circular relation when it returns CE_Always, -/// otherwise infinite eviction loops are possible. When evicting a <= RS_Second -/// range, it is possible to return CE_WithSplit which forces the evicted -/// register to be split or spilled before it can evict anything again. That -/// guarantees progress. -RAGreedy::CanEvict RAGreedy::canEvict(LiveInterval &A, LiveInterval &B) { - return A.weight > B.weight ? CE_Always : CE_Never; +/// @param A The live range to be assigned. +/// @param IsHint True when A is about to be assigned to its preferred +/// register. +/// @param B The live range to be evicted. +/// @param BreaksHint True when B is already assigned to its preferred register. +bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, + LiveInterval &B, bool BreaksHint) { + bool CanSplit = getStage(B) <= RS_Second; + + // Be fairly aggressive about following hints as long as the evictee can be + // split. + if (CanSplit && IsHint && !BreaksHint) + return true; + + return A.weight > B.weight; } -/// canEvict - Return true if all interferences between VirtReg and PhysReg can -/// be evicted. -/// Return false if any interference is heavier than MaxWeight. -/// On return, set MaxWeight to the maximal spill weight of an interference. +/// canEvictInterference - Return true if all interferences between VirtReg and +/// PhysReg can be evicted. When OnlyCheap is set, don't do anything +/// +/// @param VirtReg Live range that is about to be assigned. +/// @param PhysReg Desired register for assignment. +/// @prarm IsHint True when PhysReg is VirtReg's preferred register. +/// @param MaxCost Only look for cheaper candidates and update with new cost +/// when returning true. +/// @returns True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, - float &MaxWeight) { - float Weight = 0; + bool IsHint, EvictionCost &MaxCost) { + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never + // involved in an eviction before. If a cascade number was assigned, deny + // evicting anything with the same or a newer cascade number. This prevents + // infinite eviction loops. + // + // This works out so a register without a cascade number is allowed to evict + // anything, and it can be evicted by anything. + unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + if (!Cascade) + Cascade = NextCascade; + + EvictionCost Cost; for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); // If there is 10 or more interferences, chances are one is heavier. - if (Q.collectInterferingVRegs(10, MaxWeight) >= 10) + if (Q.collectInterferingVRegs(10) >= 10) return false; // Check if any interfering live range is heavier than MaxWeight. @@ -425,25 +486,69 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, LiveInterval *Intf = Q.interferingVRegs()[i - 1]; if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) return false; - if (Intf->weight >= MaxWeight) - return false; - switch (canEvict(VirtReg, *Intf)) { - case CE_Always: - break; - case CE_Never: + // Never evict spill products. They cannot split or spill. + if (getStage(*Intf) == RS_Spill) return false; - case CE_WithSplit: - if (getStage(*Intf) > RS_Second) + // Once a live range becomes small enough, it is urgent that we find a + // register for it. This is indicated by an infinite spill weight. These + // urgent live ranges get to evict almost anything. + bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable(); + // Only evict older cascades or live ranges without a cascade. + unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; + if (Cascade <= IntfCascade) { + if (!Urgent) return false; - break; + // We permit breaking cascades for urgent evictions. It should be the + // last resort, though, so make it really expensive. + Cost.BrokenHints += 10; } - Weight = std::max(Weight, Intf->weight); + // Would this break a satisfied hint? + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + // Update eviction cost. + Cost.BrokenHints += BreaksHint; + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + // Abort if this would be too expensive. + if (!(Cost < MaxCost)) + return false; + // Finally, apply the eviction policy for non-urgent evictions. + if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + return false; } } - MaxWeight = Weight; + MaxCost = Cost; return true; } +/// evictInterference - Evict any interferring registers that prevent VirtReg +/// from being assigned to Physreg. This assumes that canEvictInterference +/// returned true. +void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + // Make sure that VirtReg has a cascade number, and assign that cascade + // number to every evicted register. These live ranges than then only be + // evicted by a newer cascade, preventing infinite loops. + unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + if (!Cascade) + Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; + + DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) + << " interference: Cascade " << Cascade << '\n'); + for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + assert(Q.seenAllInterferences() && "Didn't check all interfererences."); + for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { + LiveInterval *Intf = Q.interferingVRegs()[i]; + unassign(*Intf, VRM->getPhys(Intf->reg)); + assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + VirtReg.isSpillable() < Intf->isSpillable()) && + "Cannot decrease cascade number, illegal eviction"); + ExtraRegInfo[Intf->reg].Cascade = Cascade; + ++NumEvicted; + NewVRegs.push_back(Intf); + } + } +} + /// tryEvict - Try to evict all interferences for a physreg. /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. @@ -454,31 +559,37 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); - // Keep track of the lightest single interference seen so far. - float BestWeight = HUGE_VALF; + // Keep track of the cheapest interference seen so far. + EvictionCost BestCost(~0u); unsigned BestPhys = 0; + // When we are just looking for a reduced cost per use, don't break any + // hints, and only evict smaller spill weights. + if (CostPerUseLimit < ~0u) { + BestCost.BrokenHints = 0; + BestCost.MaxWeight = VirtReg.weight; + } + Order.rewind(); while (unsigned PhysReg = Order.next()) { if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; - // The first use of a register in a function has cost 1. - if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg)) - continue; - - float Weight = BestWeight; - if (!canEvictInterference(VirtReg, PhysReg, Weight)) - continue; - - // This is an eviction candidate. - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = " - << Weight << '\n'); - if (BestPhys && Weight >= BestWeight) + // The first use of a callee-saved register in a function has cost 1. + // Don't start using a CSR when the CostPerUseLimit is low. + if (CostPerUseLimit == 1) + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (!MRI->isPhysRegUsed(CSR)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " + << PrintReg(CSR, TRI) << '\n'); + continue; + } + + if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) continue; // Best so far. BestPhys = PhysReg; - BestWeight = Weight; + // Stop if the hint can be used. if (Order.isHint(PhysReg)) break; @@ -487,22 +598,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, if (!BestPhys) return 0; - DEBUG(dbgs() << "evicting " << PrintReg(BestPhys, TRI) << " interference\n"); - for (const unsigned *AliasI = TRI->getOverlaps(BestPhys); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - assert(Q.seenAllInterferences() && "Didn't check all interfererences."); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; - unassign(*Intf, VRM->getPhys(Intf->reg)); - ++NumEvicted; - NewVRegs.push_back(Intf); - // Prevent looping by forcing the evicted ranges to be split before they - // can evict anything else. - if (getStage(*Intf) < RS_Second && - canEvict(VirtReg, *Intf) == CE_WithSplit) - LRStage[Intf->reg] = RS_Second; - } - } + evictInterference(VirtReg, BestPhys, NewVRegs); return BestPhys; } @@ -621,8 +717,7 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); } -void RAGreedy::growRegion(GlobalSplitCandidate &Cand, - InterferenceCache::Cursor Intf) { +void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Keep track of through blocks that have not been added to SpillPlacer. BitVector Todo = SA->getThroughBlocks(); SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; @@ -633,8 +728,6 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand, for (;;) { ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); - if (NewBundles.empty()) - break; // Find new through blocks in the periphery of PrefRegBundles. for (int i = 0, e = NewBundles.size(); i != e; ++i) { unsigned Bundle = NewBundles[i]; @@ -654,12 +747,12 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand, } } // Any new blocks to add? - if (ActiveBlocks.size() > AddedTo) { - ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo], - ActiveBlocks.size() - AddedTo); - addThroughConstraints(Intf, Add); - AddedTo = ActiveBlocks.size(); - } + if (ActiveBlocks.size() == AddedTo) + break; + addThroughConstraints(Cand.Intf, + ArrayRef<unsigned>(ActiveBlocks).slice(AddedTo)); + AddedTo = ActiveBlocks.size(); + // Perhaps iterating can enable more bundles? SpillPlacer->iterate(); } @@ -697,8 +790,7 @@ float RAGreedy::calcSpillCost() { /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, - InterferenceCache::Cursor Intf) { +float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { float GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); @@ -725,8 +817,8 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, continue; if (RegIn && RegOut) { // We need double spill code if this block has interference. - Intf.moveToBlock(Number); - if (Intf.hasInterference()) + Cand.Intf.moveToBlock(Number); + if (Cand.Intf.hasInterference()) GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); continue; } @@ -756,188 +848,42 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, dbgs() << ".\n"; }); - InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg); + InterferenceCache::Cursor &Intf = Cand.Intf; LiveRangeEdit LREdit(VirtReg, NewVRegs, this); SE->reset(LREdit); // Create the main cross-block interval. const unsigned MainIntv = SE->openIntv(); - // First add all defs that are live out of a block. + // First handle all the blocks with uses. ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + bool RegIn = BI.LiveIn && + LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; + bool RegOut = BI.LiveOut && + LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; // Create separate intervals for isolated blocks with multiple uses. - if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) { + if (!RegIn && !RegOut) { DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); - SE->splitSingleBlock(BI); - SE->selectIntv(MainIntv); - continue; - } - - // Should the register be live out? - if (!BI.LiveOut || !RegOut) - continue; - - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - Intf.moveToBlock(BI.MBB->getNumber()); - DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#" - << Bundles->getBundle(BI.MBB->getNumber(), 1) - << " [" << Start << ';' - << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop - << ") intf [" << Intf.first() << ';' << Intf.last() << ')'); - - // The interference interval should either be invalid or overlap MBB. - assert((!Intf.hasInterference() || Intf.first() < Stop) - && "Bad interference"); - assert((!Intf.hasInterference() || Intf.last() > Start) - && "Bad interference"); - - // Check interference leaving the block. - if (!Intf.hasInterference()) { - // Block is interference-free. - DEBUG(dbgs() << ", no interference"); - if (!BI.LiveThrough) { - DEBUG(dbgs() << ", not live-through.\n"); - SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); - continue; + if (!BI.isOneInstr()) { + SE->splitSingleBlock(BI); + SE->selectIntv(MainIntv); } - if (!RegIn) { - // Block is live-through, but entry bundle is on the stack. - // Reload just before the first use. - DEBUG(dbgs() << ", not live-in, enter before first use.\n"); - SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); - continue; - } - DEBUG(dbgs() << ", live-through.\n"); continue; } - // Block has interference. - DEBUG(dbgs() << ", interference to " << Intf.last()); - - if (!BI.LiveThrough && Intf.last() <= BI.FirstUse) { - // The interference doesn't reach the outgoing segment. - DEBUG(dbgs() << " doesn't affect def from " << BI.FirstUse << '\n'); - SE->useIntv(BI.FirstUse, Stop); - continue; - } - - SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); - if (Intf.last().getBoundaryIndex() < BI.LastUse) { - // There are interference-free uses at the end of the block. - // Find the first use that can get the live-out register. - SmallVectorImpl<SlotIndex>::const_iterator UI = - std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - Intf.last().getBoundaryIndex()); - assert(UI != SA->UseSlots.end() && "Couldn't find last use"); - SlotIndex Use = *UI; - assert(Use <= BI.LastUse && "Couldn't find last use"); - // Only attempt a split befroe the last split point. - if (Use.getBaseIndex() <= LastSplitPoint) { - DEBUG(dbgs() << ", free use at " << Use << ".\n"); - SlotIndex SegStart = SE->enterIntvBefore(Use); - assert(SegStart >= Intf.last() && "Couldn't avoid interference"); - assert(SegStart < LastSplitPoint && "Impossible split point"); - SE->useIntv(SegStart, Stop); - continue; - } - } - - // Interference is after the last use. - DEBUG(dbgs() << " after last use.\n"); - SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB); - assert(SegStart >= Intf.last() && "Couldn't avoid interference"); - } - - // Now all defs leading to live bundles are handled, do everything else. - for (unsigned i = 0; i != UseBlocks.size(); ++i) { - const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; - - // Is the register live-in? - if (!BI.LiveIn || !RegIn) - continue; - - // We have an incoming register. Check for interference. - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); Intf.moveToBlock(BI.MBB->getNumber()); - DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) - << " -> BB#" << BI.MBB->getNumber() << " [" << Start << ';' - << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop - << ')'); - // Check interference entering the block. - if (!Intf.hasInterference()) { - // Block is interference-free. - DEBUG(dbgs() << ", no interference"); - if (!BI.LiveThrough) { - DEBUG(dbgs() << ", killed in block.\n"); - SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); - continue; - } - if (!RegOut) { - SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); - // Block is live-through, but exit bundle is on the stack. - // Spill immediately after the last use. - if (BI.LastUse < LastSplitPoint) { - DEBUG(dbgs() << ", uses, stack-out.\n"); - SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); - continue; - } - // The last use is after the last split point, it is probably an - // indirect jump. - DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point " - << LastSplitPoint << ", stack-out.\n"); - SlotIndex SegEnd = SE->leaveIntvBefore(LastSplitPoint); - SE->useIntv(Start, SegEnd); - // Run a double interval from the split to the last use. - // This makes it possible to spill the complement without affecting the - // indirect branch. - SE->overlapIntv(SegEnd, BI.LastUse); - continue; - } - // Register is live-through. - DEBUG(dbgs() << ", uses, live-through.\n"); - SE->useIntv(Start, Stop); - continue; - } - - // Block has interference. - DEBUG(dbgs() << ", interference from " << Intf.first()); - - if (!BI.LiveThrough && Intf.first() >= BI.LastUse) { - // The interference doesn't reach the outgoing segment. - DEBUG(dbgs() << " doesn't affect kill at " << BI.LastUse << '\n'); - SE->useIntv(Start, BI.LastUse); - continue; - } - - if (Intf.first().getBaseIndex() > BI.FirstUse) { - // There are interference-free uses at the beginning of the block. - // Find the last use that can get the register. - SmallVectorImpl<SlotIndex>::const_iterator UI = - std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - Intf.first().getBaseIndex()); - assert(UI != SA->UseSlots.begin() && "Couldn't find first use"); - SlotIndex Use = (--UI)->getBoundaryIndex(); - DEBUG(dbgs() << ", free use at " << *UI << ".\n"); - SlotIndex SegEnd = SE->leaveIntvAfter(Use); - assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); - SE->useIntv(Start, SegEnd); - continue; - } - - // Interference is before the first use. - DEBUG(dbgs() << " before first use.\n"); - SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB); - assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); + if (RegIn && RegOut) + SE->splitLiveThroughBlock(BI.MBB->getNumber(), + MainIntv, Intf.first(), + MainIntv, Intf.last()); + else if (RegIn) + SE->splitRegInBlock(BI, MainIntv, Intf.first()); + else + SE->splitRegOutBlock(BI, MainIntv, Intf.last()); } // Handle live-through blocks. @@ -945,20 +891,11 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned Number = Cand.ActiveBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; - DEBUG(dbgs() << "Live through BB#" << Number << '\n'); - if (RegIn && RegOut) { - Intf.moveToBlock(Number); - if (!Intf.hasInterference()) { - SE->useIntv(Indexes->getMBBStartIdx(Number), - Indexes->getMBBEndIdx(Number)); - continue; - } - } - MachineBasicBlock *MBB = MF->getBlockNumbered(Number); - if (RegIn) - SE->leaveIntvAtTop(*MBB); - if (RegOut) - SE->enterIntvAtEnd(*MBB); + if (!RegIn && !RegOut) + continue; + Intf.moveToBlock(Number); + SE->splitLiveThroughBlock(Number, RegIn ? MainIntv : 0, Intf.first(), + RegOut ? MainIntv : 0, Intf.last()); } ++NumGlobalSplits; @@ -967,7 +904,7 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, SE->finish(&IntvMap); DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); - LRStage.resize(MRI->getNumVirtRegs()); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); // Sort out the new intervals created by splitting. We get four kinds: @@ -976,27 +913,27 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, // - Block-local splits are candidates for local splitting. // - DCE leftovers should go back on the queue. for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - unsigned Reg = LREdit.get(i)->reg; + LiveInterval &Reg = *LREdit.get(i); // Ignore old intervals from DCE. - if (LRStage[Reg] != RS_New) + if (getStage(Reg) != RS_New) continue; // Remainder interval. Don't try splitting again, spill if it doesn't // allocate. if (IntvMap[i] == 0) { - LRStage[Reg] = RS_Global; + setStage(Reg, RS_Global); continue; } // Main interval. Allow repeated splitting as long as the number of live // blocks is strictly decreasing. if (IntvMap[i] == MainIntv) { - if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) { + if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks << " blocks as original.\n"); // Don't allow repeated splitting as a safe guard against looping. - LRStage[Reg] = RS_Global; + setStage(Reg, RS_Global); } continue; } @@ -1015,17 +952,34 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); const unsigned NoCand = ~0u; unsigned BestCand = NoCand; + unsigned NumCands = 0; Order.rewind(); - for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) { - if (GlobalCand.size() <= Cand) - GlobalCand.resize(Cand+1); - GlobalCand[Cand].reset(PhysReg); + while (unsigned PhysReg = Order.next()) { + // Discard bad candidates before we run out of interference cache cursors. + // This will only affect register classes with a lot of registers (>32). + if (NumCands == IntfCache.getMaxCursors()) { + unsigned WorstCount = ~0u; + unsigned Worst = 0; + for (unsigned i = 0; i != NumCands; ++i) { + if (i == BestCand) + continue; + unsigned Count = GlobalCand[i].LiveBundles.count(); + if (Count < WorstCount) + Worst = i, WorstCount = Count; + } + --NumCands; + GlobalCand[Worst] = GlobalCand[NumCands]; + } + + if (GlobalCand.size() <= NumCands) + GlobalCand.resize(NumCands+1); + GlobalSplitCandidate &Cand = GlobalCand[NumCands]; + Cand.reset(IntfCache, PhysReg); - SpillPlacer->prepare(GlobalCand[Cand].LiveBundles); + SpillPlacer->prepare(Cand.LiveBundles); float Cost; - InterferenceCache::Cursor Intf(IntfCache, PhysReg); - if (!addSplitConstraints(Intf, Cost)) { + if (!addSplitConstraints(Cand.Intf, Cost)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } @@ -1040,28 +994,29 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, }); continue; } - growRegion(GlobalCand[Cand], Intf); + growRegion(Cand); SpillPlacer->finish(); // No live bundles, defer to splitSingleBlocks(). - if (!GlobalCand[Cand].LiveBundles.any()) { + if (!Cand.LiveBundles.any()) { DEBUG(dbgs() << " no bundles.\n"); continue; } - Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf); + Cost += calcGlobalSplitCost(Cand); DEBUG({ dbgs() << ", total = " << Cost << " with bundles"; - for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0; - i = GlobalCand[Cand].LiveBundles.find_next(i)) + for (int i = Cand.LiveBundles.find_first(); i>=0; + i = Cand.LiveBundles.find_next(i)) dbgs() << " EB#" << i; dbgs() << ".\n"; }); if (Cost < BestCost) { - BestCand = Cand; + BestCand = NumCands; BestCost = Hysteresis * Cost; // Prevent rounding effects. } + ++NumCands; } if (BestCand == NoCand) @@ -1302,10 +1257,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (NewGaps >= NumGaps) { DEBUG(dbgs() << "Tagging non-progress ranges: "); assert(!ProgressRequired && "Didn't make progress when it was required."); - LRStage.resize(MRI->getNumVirtRegs()); for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) if (IntvMap[i] == 1) { - LRStage[LREdit.get(i)->reg] = RS_Local; + setStage(*LREdit.get(i), RS_Local); DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg)); } DEBUG(dbgs() << '\n'); @@ -1384,7 +1338,8 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, return PhysReg; LiveRangeStage Stage = getStage(VirtReg); - DEBUG(dbgs() << StageName[Stage] << '\n'); + DEBUG(dbgs() << StageName[Stage] + << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); // Try to evict a less worthy live range, but only for ranges from the primary // queue. The RS_Second ranges already failed to do this, and they should not @@ -1399,7 +1354,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // Wait until the second time, when all smaller ranges have been allocated. // This gives a better picture of the interference to split around. if (Stage == RS_First) { - LRStage[VirtReg.reg] = RS_Second; + setStage(VirtReg, RS_Second); DEBUG(dbgs() << "wait for second round\n"); NewVRegs.push_back(&VirtReg); return 0; @@ -1407,7 +1362,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class wil report it. - if (Stage >= RS_Spill) + if (Stage >= RS_Spill || !VirtReg.isSpillable()) return ~0u; // Try splitting VirtReg or interferences. @@ -1443,15 +1398,15 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DomTree = &getAnalysis<MachineDominatorTree>(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); Loops = &getAnalysis<MachineLoopInfo>(); - LoopRanges = &getAnalysis<MachineLoopRanges>(); Bundles = &getAnalysis<EdgeBundles>(); SpillPlacer = &getAnalysis<SpillPlacement>(); DebugVars = &getAnalysis<LiveDebugVariables>(); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); - LRStage.clear(); - LRStage.resize(MRI->getNumVirtRegs()); + ExtraRegInfo.clear(); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + NextCascade = 1; IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); allocatePhysRegs(); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 5ef88cb..0dd3c598 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -16,7 +16,9 @@ #include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" +#include "RegisterClassInfo.h" #include "Spiller.h" +#include "RegisterCoalescer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" #include "llvm/CodeGen/CalcSpillWeights.h" @@ -27,7 +29,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -57,11 +58,6 @@ NewHeuristic("new-spilling-heuristic", cl::init(false), cl::Hidden); static cl::opt<bool> -PreSplitIntervals("pre-alloc-split", - cl::desc("Pre-register allocation live interval splitting"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> TrivCoalesceEnds("trivial-coalesce-ends", cl::desc("Attempt trivial coalescing of interval ends"), cl::init(false), cl::Hidden); @@ -100,10 +96,9 @@ namespace { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup( + initializeRegisterCoalescerPass( *PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); - initializePreAllocSplittingPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -148,6 +143,7 @@ namespace { BitVector reservedRegs_; LiveIntervals* li_; MachineLoopInfo *loopInfo; + RegisterClassInfo RegClassInfo; /// handled_ - Intervals are added to the handled_ set in the order of their /// start value. This is uses for backtracking. @@ -215,8 +211,6 @@ namespace { // to coalescing and which analyses coalescing invalidates. AU.addRequiredTransitive<RegisterCoalescer>(); AU.addRequired<CalculateSpillWeights>(); - if (PreSplitIntervals) - AU.addRequiredID(PreAllocSplittingID); AU.addRequiredID(LiveStacksID); AU.addPreservedID(LiveStacksID); AU.addRequired<MachineLoopInfo>(); @@ -366,13 +360,10 @@ namespace { /// getFirstNonReservedPhysReg - return the first non-reserved physical /// register in the register class. unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) { - TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_); - TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_); - while (i != aoe && reservedRegs_.test(*i)) - ++i; - assert(i != aoe && "All registers reserved?!"); - return *i; - } + ArrayRef<unsigned> O = RegClassInfo.getOrder(RC); + assert(!O.empty() && "All registers reserved?!"); + return O.front(); + } void ComputeRelatedRegClasses(); @@ -402,11 +393,10 @@ INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) -INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting) INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_AG_DEPENDENCY(RegisterCoalescer) +INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", "Linear Scan Register Allocator", false, false) @@ -524,6 +514,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { reservedRegs_ = tri_->getReservedRegs(fn); li_ = &getAnalysis<LiveIntervals>(); loopInfo = &getAnalysis<MachineLoopInfo>(); + RegClassInfo.runOnMachineFunction(fn); // We don't run the coalescer here because we have no reason to // interact with it. If the coalescer requires interaction, it @@ -1166,14 +1157,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { bool Found = false; std::vector<std::pair<unsigned,float> > RegsWeights; + ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC); if (!minReg || SpillWeights[minReg] == HUGE_VALF) - for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), - e = RC->allocation_order_end(*mf_); i != e; ++i) { - unsigned reg = *i; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned reg = Order[i]; float regWeight = SpillWeights[reg]; - // Don't even consider reserved regs. - if (reservedRegs_.test(reg)) - continue; // Skip recently allocated registers and reserved registers. if (minWeight > regWeight && !isRecentlyUsed(reg)) Found = true; @@ -1182,11 +1170,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // If we didn't find a register that is spillable, try aliases? if (!Found) { - for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), - e = RC->allocation_order_end(*mf_); i != e; ++i) { - unsigned reg = *i; - if (reservedRegs_.test(reg)) - continue; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned reg = Order[i]; // No need to worry about if the alias register size < regsize of RC. // We are going to spill all registers that alias it anyway. for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) @@ -1446,13 +1431,17 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) physReg = vrm_->getPhys(physReg); - TargetRegisterClass::iterator I, E; - tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_); - assert(I != E && "No allocatable register in this register class!"); + ArrayRef<unsigned> Order; + if (Hint.first) + Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_); + else + Order = RegClassInfo.getOrder(RC); + + assert(!Order.empty() && "No allocatable register in this register class!"); // Scan for the first available register. - for (; I != E; ++I) { - unsigned Reg = *I; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned Reg = Order[i]; // Ignore "downgraded" registers. if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; @@ -1482,8 +1471,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, // inactive count. Alkis found that this reduced register pressure very // slightly on X86 (in rev 1.94 of this file), though this should probably be // reevaluated now. - for (; I != E; ++I) { - unsigned Reg = *I; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned Reg = Order[i]; // Ignore "downgraded" registers. if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 1e1f1e0..72230d4 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -35,6 +35,7 @@ #include "Splitter.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" +#include "RegisterCoalescer.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -46,7 +47,6 @@ #include "llvm/CodeGen/PBQP/Graph.h" #include "llvm/CodeGen/PBQP/Heuristics/Briggs.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -84,11 +84,11 @@ public: static char ID; /// Construct a PBQP register allocator. - RegAllocPBQP(std::auto_ptr<PBQPBuilder> b) - : MachineFunctionPass(ID), builder(b) { + RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0) + : MachineFunctionPass(ID), builder(b), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -122,6 +122,8 @@ private: std::auto_ptr<PBQPBuilder> builder; + char *customPassID; + MachineFunction *mf; const TargetMachine *tm; const TargetRegisterInfo *tri; @@ -222,10 +224,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, // Compute an initial allowed set for the current vreg. typedef std::vector<unsigned> VRAllowed; VRAllowed vrAllowed; - for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf), - aoEnd = trc->allocation_order_end(*mf); - aoItr != aoEnd; ++aoItr) { - unsigned preg = *aoItr; + ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf); + for (unsigned i = 0; i != rawOrder.size(); ++i) { + unsigned preg = rawOrder[i]; if (!reservedRegs.test(preg)) { vrAllowed.push_back(preg); } @@ -450,6 +451,8 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<LiveIntervals>(); //au.addRequiredID(SplitCriticalEdgesID); au.addRequired<RegisterCoalescer>(); + if (customPassID) + au.addRequiredID(*customPassID); au.addRequired<CalculateSpillWeights>(); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); @@ -581,7 +584,7 @@ void RegAllocPBQP::finalizeAlloc() const { if (physReg == 0) { const TargetRegisterClass *liRC = mri->getRegClass(li->reg); - physReg = *liRC->allocation_order_begin(*mf); + physReg = liRC->getRawAllocationOrder(*mf).front(); } vrm->assignVirt2Phys(li->reg, physReg); @@ -703,8 +706,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { } FunctionPass* llvm::createPBQPRegisterAllocator( - std::auto_ptr<PBQPBuilder> builder) { - return new RegAllocPBQP(builder); + std::auto_ptr<PBQPBuilder> builder, + char *customPassID) { + return new RegAllocPBQP(builder, customPassID); } FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 75b0c90..5a77e47 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -81,11 +81,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // FIXME: Once targets reserve registers instead of removing them from the // allocation order, we can simply use begin/end here. - TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF); - TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF); - - for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { - unsigned PhysReg = *I; + ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF); + for (unsigned i = 0; i != RawOrder.size(); ++i) { + unsigned PhysReg = RawOrder[i]; // Remove reserved registers from the allocation order. if (Reserved.test(PhysReg)) continue; @@ -103,7 +101,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { DEBUG({ dbgs() << "AllocationOrder(" << RC->getName() << ") = ["; - for (unsigned I = 0; I != N; ++I) + for (unsigned I = 0; I != RCI.NumRegs; ++I) dbgs() << ' ' << PrintReg(RCI.Order[I], TRI); dbgs() << " ]\n"; }); diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h index 6f7d9c9..d21fd67 100644 --- a/lib/CodeGen/RegisterClassInfo.h +++ b/lib/CodeGen/RegisterClassInfo.h @@ -112,7 +112,7 @@ public: /// register, so a register allocator needs to track its liveness and /// availability. bool isAllocatable(unsigned PhysReg) const { - return TRI->get(PhysReg).inAllocatableClass && !isReserved(PhysReg); + return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg); } }; } // end namespace llvm diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 407559a..b91f92c 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -13,38 +13,92 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/RegisterCoalescer.h" +#define DEBUG_TYPE "regcoalescing" +#include "RegisterCoalescer.h" +#include "VirtRegMap.h" +#include "LiveDebugVariables.h" + +#include "llvm/Pass.h" +#include "llvm/Value.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Pass.h" - +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <cmath> using namespace llvm; -// Register the RegisterCoalescer interface, providing a nice name to refer to. -INITIALIZE_ANALYSIS_GROUP(RegisterCoalescer, "Register Coalescer", - SimpleRegisterCoalescing) -char RegisterCoalescer::ID = 0; +STATISTIC(numJoins , "Number of interval joins performed"); +STATISTIC(numCrossRCs , "Number of cross class joins performed"); +STATISTIC(numCommutes , "Number of instruction commuting performed"); +STATISTIC(numExtends , "Number of copies extended"); +STATISTIC(NumReMats , "Number of instructions re-materialized"); +STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); +STATISTIC(numAborts , "Number of times interval joining aborted"); -// RegisterCoalescer destructor: DO NOT move this to the header file -// for RegisterCoalescer or else clients of the RegisterCoalescer -// class may not depend on the RegisterCoalescer.o file in the current -// .a file, causing alias analysis support to not be included in the -// tool correctly! -// -RegisterCoalescer::~RegisterCoalescer() {} +static cl::opt<bool> +EnableJoining("join-liveintervals", + cl::desc("Coalesce copies (default=true)"), + cl::init(true)); + +static cl::opt<bool> +DisableCrossClassJoin("disable-cross-class-join", + cl::desc("Avoid coalescing cross register class copies"), + cl::init(false), cl::Hidden); -unsigned CoalescerPair::compose(unsigned a, unsigned b) const { +static cl::opt<bool> +EnablePhysicalJoin("join-physregs", + cl::desc("Join physical register copies"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +VerifyCoalescing("verify-coalescing", + cl::desc("Verify machine instrs before and after register coalescing"), + cl::Hidden); + +INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", + "Simple Register Coalescing", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) +INITIALIZE_PASS_DEPENDENCY(PHIElimination) +INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", + "Simple Register Coalescing", false, false) + +char RegisterCoalescer::ID = 0; + +static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) { if (!a) return b; if (!b) return a; - return tri_.composeSubRegIndices(a, b); + return tri.composeSubRegIndices(a, b); } -bool CoalescerPair::isMoveInstr(const MachineInstr *MI, - unsigned &Src, unsigned &Dst, - unsigned &SrcSub, unsigned &DstSub) const { +static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, + unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) { if (MI->isCopy()) { Dst = MI->getOperand(0).getReg(); DstSub = MI->getOperand(0).getSubReg(); @@ -52,7 +106,8 @@ bool CoalescerPair::isMoveInstr(const MachineInstr *MI, SrcSub = MI->getOperand(1).getSubReg(); } else if (MI->isSubregToReg()) { Dst = MI->getOperand(0).getReg(); - DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm()); + DstSub = compose(tri, MI->getOperand(0).getSubReg(), + MI->getOperand(3).getImm()); Src = MI->getOperand(2).getReg(); SrcSub = MI->getOperand(2).getSubReg(); } else @@ -66,7 +121,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { flipped_ = crossClass_ = false; unsigned Src, Dst, SrcSub, DstSub; - if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub)) return false; partial_ = SrcSub || DstSub; @@ -156,7 +211,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (!MI) return false; unsigned Src, Dst, SrcSub, DstSub; - if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub)) return false; // Find the virtual register that is srcReg_. @@ -185,13 +240,1558 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (dstReg_ != Dst) return false; // Registers match, do the subregisters line up? - return compose(subIdx_, SrcSub) == DstSub; + return compose(tri_, subIdx_, SrcSub) == DstSub; + } +} + +void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + AU.addPreservedID(StrongPHIEliminationID); + AU.addPreservedID(PHIEliminationID); + AU.addPreservedID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) { + /// Joined copies are not deleted immediately, but kept in JoinedCopies. + JoinedCopies.insert(CopyMI); + + /// Mark all register operands of CopyMI as <undef> so they won't affect dead + /// code elimination. + for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), + E = CopyMI->operands_end(); I != E; ++I) + if (I->isReg()) + I->setIsUndef(true); +} + +/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// being the source and IntB being the dest, thus this defines a value number +/// in IntB. If the source value number (in IntA) is defined by a copy from B, +/// see if we can merge these two pieces of B into a single value number, +/// eliminating a copy. For example: +/// +/// A3 = B0 +/// ... +/// B1 = A3 <- this copy +/// +/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 +/// value number to be replaced with B0 (which simplifies the B liveinterval). +/// +/// This returns true if an interval was modified. +/// +bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, + MachineInstr *CopyMI) { + // Bail if there is no dst interval - can happen when merging physical subreg + // operations. + if (!li_->hasInterval(CP.getDstReg())) + return false; + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); + if (BLR == IntB.end()) return false; + VNInfo *BValNo = BLR->valno; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + if (!BValNo->isDefByCopy()) return false; + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); + LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); + // The live range might not exist after fun with physreg coalescing. + if (ALR == IntA.end()) return false; + VNInfo *AValNo = ALR->valno; + // If it's re-defined by an early clobber somewhere in the live range, then + // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. + // See PR3149: + // 172 %ECX<def> = MOV32rr %reg1039<kill> + // 180 INLINEASM <es:subl $5,$1 + // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, + // %EAX<kill>, + // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0 + // 188 %EAX<def> = MOV32rr %EAX<kill> + // 196 %ECX<def> = MOV32rr %ECX<kill> + // 204 %ECX<def> = MOV32rr %ECX<kill> + // 212 %EAX<def> = MOV32rr %EAX<kill> + // 220 %EAX<def> = MOV32rr %EAX + // 228 %reg1039<def> = MOV32rr %ECX<kill> + // The early clobber operand ties ECX input to the ECX def. + // + // The live interval of ECX is represented as this: + // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) + // The coalescer has no idea there was a def in the middle of [174,230]. + if (AValNo->hasRedefByEC()) + return false; + + // If AValNo is defined as a copy from IntB, we can potentially process this. + // Get the instruction that defines this value number. + if (!CP.isCoalescable(AValNo->getCopy())) + return false; + + // Get the LiveRange in IntB that this value number starts with. + LiveInterval::iterator ValLR = + IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); + if (ValLR == IntB.end()) + return false; + + // Make sure that the end of the live range is inside the same block as + // CopyMI. + MachineInstr *ValLREndInst = + li_->getInstructionFromIndex(ValLR->end.getPrevSlot()); + if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + return false; + + // Okay, we now know that ValLR ends in the same block that the CopyMI + // live-range starts. If there are no intervening live ranges between them in + // IntB, we can merge them. + if (ValLR+1 != BLR) return false; + + // If a live interval is a physical register, conservatively check if any + // of its aliases is overlapping the live interval of the virtual register. + // If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) { + DEBUG({ + dbgs() << "\t\tInterfere with alias "; + li_->getInterval(*AS).print(dbgs(), tri_); + }); + return false; + } + } + + DEBUG({ + dbgs() << "Extending: "; + IntB.print(dbgs(), tri_); + }); + + SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; + // We are about to delete CopyMI, so need to remove it as the 'instruction + // that defines this value #'. Update the valnum with the new defining + // instruction #. + BValNo->def = FillerStart; + BValNo->setCopy(0); + + // Okay, we can merge them. We need to insert a new liverange: + // [ValLR.end, BLR.begin) of either value number, then we merge the + // two value numbers. + IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + + // If the IntB live range is assigned to a physical register, and if that + // physreg has sub-registers, update their live intervals as well. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &SRLI = li_->getInterval(*SR); + SRLI.addRange(LiveRange(FillerStart, FillerEnd, + SRLI.getNextValue(FillerStart, 0, + li_->getVNInfoAllocator()))); + } + } + + // Okay, merge "B1" into the same value number as "B0". + if (BValNo != ValLR->valno) { + // If B1 is killed by a PHI, then the merged live range must also be killed + // by the same PHI, as B0 and B1 can not overlap. + bool HasPHIKill = BValNo->hasPHIKill(); + IntB.MergeValueNumberInto(BValNo, ValLR->valno); + if (HasPHIKill) + ValLR->valno->setHasPHIKill(true); + } + DEBUG({ + dbgs() << " result = "; + IntB.print(dbgs(), tri_); + dbgs() << "\n"; + }); + + // If the source instruction was killing the source register before the + // merge, unset the isKill marker given the live range has been extended. + int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + if (UIdx != -1) { + ValLREndInst->getOperand(UIdx).setIsKill(false); + } + + // If the copy instruction was killing the destination register before the + // merge, find the last use and trim the live range. That will also add the + // isKill marker. + if (ALR->end == CopyIdx) + li_->shrinkToUses(&IntA); + + ++numExtends; + return true; +} + +/// HasOtherReachingDefs - Return true if there are definitions of IntB +/// other than BValNo val# that can reach uses of AValno val# of IntA. +bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA, + LiveInterval &IntB, + VNInfo *AValNo, + VNInfo *BValNo) { + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + LiveInterval::Ranges::iterator BI = + std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); + if (BI != IntB.ranges.begin()) + --BI; + for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { + if (BI->valno == BValNo) + continue; + if (BI->start <= AI->start && BI->end > AI->start) + return true; + if (BI->start > AI->start && BI->start < AI->end) + return true; + } } + return false; } -// Because of the way .a files work, we must force the SimpleRC -// implementation to be pulled in if the RegisterCoalescer classes are -// pulled in. Otherwise we run the risk of RegisterCoalescer being -// used, but the default implementation not being linked into the tool -// that uses it. -DEFINING_FILE_FOR(RegisterCoalescer) +/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with +/// IntA being the source and IntB being the dest, thus this defines a value +/// number in IntB. If the source value number (in IntA) is defined by a +/// commutable instruction and its other operand is coalesced to the copy dest +/// register, see if we can transform the copy into a noop by commuting the +/// definition. For example, +/// +/// A3 = op A2 B0<kill> +/// ... +/// B1 = A3 <- this copy +/// ... +/// = op A3 <- more uses +/// +/// ==> +/// +/// B2 = op B0 A2<kill> +/// ... +/// B1 = B2 <- now an identify copy +/// ... +/// = op B2 <- more uses +/// +/// This returns true if an interval was modified. +/// +bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, + MachineInstr *CopyMI) { + // FIXME: For now, only eliminate the copy by commuting its def when the + // source register is a virtual register. We want to guard against cases + // where the copy is a back edge copy and commuting the def lengthen the + // live interval of the source register to the entire loop. + if (CP.isPhys() && CP.isFlipped()) + return false; + + // Bail if there is no dst interval. + if (!li_->hasInterval(CP.getDstReg())) + return false; + + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); + if (!BValNo || !BValNo->isDefByCopy()) + return false; + + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex()); + assert(AValNo && "COPY source not live"); + + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. + if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); + if (!DefMI) + return false; + const MCInstrDesc &MCID = DefMI->getDesc(); + if (!MCID.isCommutable()) + return false; + // If DefMI is a two-address instruction then commuting it will change the + // destination register. + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); + assert(DefIdx != -1); + unsigned UseOpIdx; + if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) + return false; + unsigned Op1, Op2, NewDstIdx; + if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2)) + return false; + if (Op1 == UseOpIdx) + NewDstIdx = Op2; + else if (Op2 == UseOpIdx) + NewDstIdx = Op1; + else + return false; + + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (NewReg != IntB.reg || !NewDstMO.isKill()) + return false; + + // Make sure there are no other definitions of IntB that would reach the + // uses which the new definition can reach. + if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) + return false; + + // Abort if the aliases of IntB.reg have values that are not simply the + // clobbers from the superreg. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && + HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0)) + return false; + + // If some of the uses of IntA.reg is already coalesced away, return false. + // It's not possible to determine whether it's safe to perform the coalescing. + for (MachineRegisterInfo::use_nodbg_iterator UI = + mri_->use_nodbg_begin(IntA.reg), + UE = mri_->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + SlotIndex UseIdx = li_->getInstructionIndex(UseMI); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end()) + continue; + if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) + return false; + } + + DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t' + << *DefMI); + + // At this point we have decided that it is legal to do this + // transformation. Start by commuting the instruction. + MachineBasicBlock *MBB = DefMI->getParent(); + MachineInstr *NewMI = tii_->commuteInstruction(DefMI); + if (!NewMI) + return false; + if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && + TargetRegisterInfo::isVirtualRegister(IntB.reg) && + !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg))) + return false; + if (NewMI != DefMI) { + li_->ReplaceMachineInstrInMaps(DefMI, NewMI); + MBB->insert(DefMI, NewMI); + MBB->erase(DefMI); + } + unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); + NewMI->getOperand(OpIdx).setIsKill(); + + // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. + // A = or A, B + // ... + // B = A + // ... + // C = A<kill> + // ... + // = B + + // Update uses of IntA of the specific Val# with IntB. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), + UE = mri_->use_end(); UI != UE;) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (JoinedCopies.count(UseMI)) + continue; + if (UseMI->isDebugValue()) { + // FIXME These don't have an instruction index. Not clear we have enough + // info to decide whether to do this replacement or not. For now do it. + UseMO.setReg(NewReg); + continue; + } + SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex(); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end() || ULR->valno != AValNo) + continue; + if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + UseMO.substPhysReg(NewReg, *tri_); + else + UseMO.setReg(NewReg); + if (UseMI == CopyMI) + continue; + if (!UseMI->isCopy()) + continue; + if (UseMI->getOperand(0).getReg() != IntB.reg || + UseMI->getOperand(0).getSubReg()) + continue; + + // This copy will become a noop. If it's defining a new val#, merge it into + // BValNo. + SlotIndex DefIdx = UseIdx.getDefIndex(); + VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); + if (!DVNI) + continue; + DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); + assert(DVNI->def == DefIdx); + BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); + markAsJoined(UseMI); + } + + // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition + // is updated. + VNInfo *ValNo = BValNo; + ValNo->def = AValNo->def; + ValNo->setCopy(0); + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); + } + DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); + + IntA.removeValNo(AValNo); + DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); + ++numCommutes; + return true; +} + +/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial +/// computation, replace the copy by rematerialize the definition. +bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, + bool preserveSrcInt, + unsigned DstReg, + unsigned DstSubIdx, + MachineInstr *CopyMI) { + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex(); + LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); + assert(SrcLR != SrcInt.end() && "Live range not found!"); + VNInfo *ValNo = SrcLR->valno; + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. + if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill()) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); + if (!DefMI) + return false; + assert(DefMI && "Defining instruction disappeared"); + const MCInstrDesc &MCID = DefMI->getDesc(); + if (!MCID.isAsCheapAsAMove()) + return false; + if (!tii_->isTriviallyReMaterializable(DefMI, AA)) + return false; + bool SawStore = false; + if (!DefMI->isSafeToMove(tii_, AA, SawStore)) + return false; + if (MCID.getNumDefs() != 1) + return false; + if (!DefMI->isImplicitDef()) { + // Make sure the copy destination register class fits the instruction + // definition register class. The mismatch can happen as a result of earlier + // extract_subreg, insert_subreg, subreg_to_reg coalescing. + const TargetRegisterClass *RC = tii_->getRegClass(MCID, 0, tri_); + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + if (mri_->getRegClass(DstReg) != RC) + return false; + } else if (!RC->contains(DstReg)) + return false; + } + + // If destination register has a sub-register index on it, make sure it + // matches the instruction register class. + if (DstSubIdx) { + const MCInstrDesc &MCID = DefMI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); + const TargetRegisterClass *DstSubRC = + DstRC->getSubRegisterRegClass(DstSubIdx); + const TargetRegisterClass *DefRC = tii_->getRegClass(MCID, 0, tri_); + if (DefRC == DstRC) + DstSubIdx = 0; + else if (DefRC != DstSubRC) + return false; + } + + RemoveCopyFlag(DstReg, CopyMI); + + MachineBasicBlock *MBB = CopyMI->getParent(); + MachineBasicBlock::iterator MII = + llvm::next(MachineBasicBlock::iterator(CopyMI)); + tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); + MachineInstr *NewMI = prior(MII); + + // CopyMI may have implicit operands, transfer them over to the newly + // rematerialized instruction. And update implicit def interval valnos. + for (unsigned i = CopyMI->getDesc().getNumOperands(), + e = CopyMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = CopyMI->getOperand(i); + if (MO.isReg() && MO.isImplicit()) + NewMI->addOperand(MO); + if (MO.isDef()) + RemoveCopyFlag(MO.getReg(), CopyMI); + } + + NewMI->copyImplicitOps(CopyMI); + li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); + CopyMI->eraseFromParent(); + ReMatCopies.insert(CopyMI); + ReMatDefs.insert(DefMI); + DEBUG(dbgs() << "Remat: " << *NewMI); + ++NumReMats; + + // The source interval can become smaller because we removed a use. + if (preserveSrcInt) + li_->shrinkToUses(&SrcInt); + + return true; +} + +/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and +/// update the subregister number if it is not zero. If DstReg is a +/// physical register and the existing subregister number of the def / use +/// being updated is not zero, make sure to set it to the correct physical +/// subregister. +void +RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { + bool DstIsPhys = CP.isPhys(); + unsigned SrcReg = CP.getSrcReg(); + unsigned DstReg = CP.getDstReg(); + unsigned SubIdx = CP.getSubIdx(); + + // Update LiveDebugVariables. + ldv_->renameRegister(SrcReg, DstReg, SubIdx); + + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); + MachineInstr *UseMI = I.skipInstruction();) { + // A PhysReg copy that won't be coalesced can perhaps be rematerialized + // instead. + if (DstIsPhys) { + if (UseMI->isCopy() && + !UseMI->getOperand(1).getSubReg() && + !UseMI->getOperand(0).getSubReg() && + UseMI->getOperand(1).getReg() == SrcReg && + UseMI->getOperand(0).getReg() != SrcReg && + UseMI->getOperand(0).getReg() != DstReg && + !JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), false, + UseMI->getOperand(0).getReg(), 0, UseMI)) + continue; + } + + SmallVector<unsigned,8> Ops; + bool Reads, Writes; + tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + bool Kills = false, Deads = false; + + // Replace SrcReg with DstReg in all UseMI operands. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(Ops[i]); + Kills |= MO.isKill(); + Deads |= MO.isDead(); + + if (DstIsPhys) + MO.substPhysReg(DstReg, *tri_); + else + MO.substVirtReg(DstReg, SubIdx, *tri_); + } + + // This instruction is a copy that will be removed. + if (JoinedCopies.count(UseMI)) + continue; + + if (SubIdx) { + // If UseMI was a simple SrcReg def, make sure we didn't turn it into a + // read-modify-write of DstReg. + if (Deads) + UseMI->addRegisterDead(DstReg, tri_); + else if (!Reads && Writes) + UseMI->addRegisterDefined(DstReg, tri_); + + // Kill flags apply to the whole physical register. + if (DstIsPhys && Kills) + UseMI->addRegisterKilled(DstReg, tri_); + } + + DEBUG({ + dbgs() << "\t\tupdated: "; + if (!UseMI->isDebugValue()) + dbgs() << li_->getInstructionIndex(UseMI) << "\t"; + dbgs() << *UseMI; + }); + } +} + +/// removeIntervalIfEmpty - Check if the live interval of a physical register +/// is empty, if so remove it and also remove the empty intervals of its +/// sub-registers. Return true if live interval is removed. +static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, + const TargetRegisterInfo *tri_) { + if (li.empty()) { + if (TargetRegisterInfo::isPhysicalRegister(li.reg)) + for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &sli = li_->getInterval(*SR); + if (sli.empty()) + li_->removeInterval(*SR); + } + li_->removeInterval(li.reg); + return true; + } + return false; +} + +/// RemoveDeadDef - If a def of a live interval is now determined dead, remove +/// the val# it defines. If the live interval becomes empty, remove it as well. +bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, + MachineInstr *DefMI) { + SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex(); + LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); + if (DefIdx != MLR->valno->def) + return false; + li.removeValNo(MLR->valno); + return removeIntervalIfEmpty(li, li_, tri_); +} + +void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg, + const MachineInstr *CopyMI) { + SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + if (li_->hasInterval(DstReg)) { + LiveInterval &LI = li_->getInterval(DstReg); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->def == DefIdx) + LR->valno->setCopy(0); + } + if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) + return; + for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) { + if (!li_->hasInterval(*AS)) + continue; + LiveInterval &LI = li_->getInterval(*AS); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->def == DefIdx) + LR->valno->setCopy(0); + } +} + +/// shouldJoinPhys - Return true if a copy involving a physreg should be joined. +/// We need to be careful about coalescing a source physical register with a +/// virtual register. Once the coalescing is done, it cannot be broken and these +/// are not spillable! If the destination interval uses are far away, think +/// twice about coalescing them! +bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) { + bool Allocatable = li_->isAllocatable(CP.getDstReg()); + LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + + /// Always join simple intervals that are defined by a single copy from a + /// reserved register. This doesn't increase register pressure, so it is + /// always beneficial. + if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) + return true; + + if (!EnablePhysicalJoin) { + DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); + return false; + } + + // Only coalesce to allocatable physreg, we don't want to risk modifying + // reserved registers. + if (!Allocatable) { + DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); + return false; // Not coalescable. + } + + // Don't join with physregs that have a ridiculous number of live + // ranges. The data structure performance is really bad when that + // happens. + if (li_->hasInterval(CP.getDstReg()) && + li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { + ++numAborts; + DEBUG(dbgs() + << "\tPhysical register live interval too complicated, abort!\n"); + return false; + } + + // FIXME: Why are we skipping this test for partial copies? + // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. + if (!CP.isPartial()) { + const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); + unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + if (Length > Threshold) { + ++numAborts; + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); + return false; + } + } + return true; +} + +/// isWinToJoinCrossClass - Return true if it's profitable to coalesce +/// two virtual registers from different register classes. +bool +RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC) { + unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC); + // This heuristics is good enough in practice, but it's obviously not *right*. + // 4 is a magic number that works well enough for x86, ARM, etc. It filter + // out all but the most restrictive register classes. + if (NewRCCount > 4 || + // Early exit if the function is fairly small, coalesce aggressively if + // that's the case. For really special register classes with 3 or + // fewer registers, be a bit more careful. + (li_->getFuncInstructionCount() / NewRCCount) < 8) + return true; + LiveInterval &SrcInt = li_->getInterval(SrcReg); + LiveInterval &DstInt = li_->getInterval(DstReg); + unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); + unsigned DstSize = li_->getApproximateInstructionCount(DstInt); + + // Coalesce aggressively if the intervals are small compared to the number of + // registers in the new class. The number 4 is fairly arbitrary, chosen to be + // less aggressive than the 8 used for the whole function size. + const unsigned ThresSize = 4 * NewRCCount; + if (SrcSize <= ThresSize && DstSize <= ThresSize) + return true; + + // Estimate *register use density*. If it doubles or more, abort. + unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), + mri_->use_nodbg_end()); + unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), + mri_->use_nodbg_end()); + unsigned NewUses = SrcUses + DstUses; + unsigned NewSize = SrcSize + DstSize; + if (SrcRC != NewRC && SrcSize > ThresSize) { + unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC); + if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) + return false; + } + if (DstRC != NewRC && DstSize > ThresSize) { + unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC); + if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) + return false; + } + return true; +} + + +/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// which are the src/dst of the copy instruction CopyMI. This returns true +/// if the copy was successfully coalesced away. If it is not currently +/// possible to coalesce this interval, but it may be possible if other +/// things get coalesced, then it returns true by reference in 'Again'. +bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { + + Again = false; + if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) + return false; // Already done. + + DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); + + CoalescerPair CP(*tii_, *tri_); + if (!CP.setRegisters(CopyMI)) { + DEBUG(dbgs() << "\tNot coalescable.\n"); + return false; + } + + // If they are already joined we continue. + if (CP.getSrcReg() == CP.getDstReg()) { + markAsJoined(CopyMI); + DEBUG(dbgs() << "\tCopy already coalesced.\n"); + return false; // Not coalescable. + } + + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_) + << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) + << "\n"); + + // Enforce policies. + if (CP.isPhys()) { + if (!shouldJoinPhys(CP)) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, + CP.getDstReg(), 0, CopyMI)) + return true; + return false; + } + } else { + // Avoid constraining virtual register regclass too much. + if (CP.isCrossClass()) { + DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); + if (DisableCrossClassJoin) { + DEBUG(dbgs() << "\tCross-class joins disabled.\n"); + return false; + } + if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), + mri_->getRegClass(CP.getSrcReg()), + mri_->getRegClass(CP.getDstReg()), + CP.getNewRC())) { + DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); + Again = true; // May be possible to coalesce later. + return false; + } + } + + // When possible, let DstReg be the larger interval. + if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() > + li_->getInterval(CP.getDstReg()).ranges.size()) + CP.flip(); + } + + // Okay, attempt to join these two intervals. On failure, this returns false. + // Otherwise, if one of the intervals being joined is a physreg, this method + // always canonicalizes DstInt to be it. The output "SrcInt" will not have + // been modified, so we can use this information below to update aliases. + if (!JoinIntervals(CP)) { + // Coalescing failed. + + // If definition of source is defined by trivial computation, try + // rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, + CP.getDstReg(), 0, CopyMI)) + return true; + + // If we can eliminate the copy without merging the live ranges, do so now. + if (!CP.isPartial()) { + if (AdjustCopiesBackFrom(CP, CopyMI) || + RemoveCopyByCommutingDef(CP, CopyMI)) { + markAsJoined(CopyMI); + DEBUG(dbgs() << "\tTrivial!\n"); + return true; + } + } + + // Otherwise, we are unable to join the intervals. + DEBUG(dbgs() << "\tInterference!\n"); + Again = true; // May be possible to coalesce later. + return false; + } + + // Coalescing to a virtual register that is of a sub-register class of the + // other. Make sure the resulting register is set to the right register class. + if (CP.isCrossClass()) { + ++numCrossRCs; + mri_->setRegClass(CP.getDstReg(), CP.getNewRC()); + } + + // Remember to delete the copy instruction. + markAsJoined(CopyMI); + + UpdateRegDefsUses(CP); + + // If we have extended the live range of a physical register, make sure we + // update live-in lists as well. + if (CP.isPhys()) { + SmallVector<MachineBasicBlock*, 16> BlockSeq; + // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the + // ranges for this, and they are preserved. + LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg()); + for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); + I != E; ++I ) { + li_->findLiveInMBBs(I->start, I->end, BlockSeq); + for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { + MachineBasicBlock &block = *BlockSeq[idx]; + if (!block.isLiveIn(CP.getDstReg())) + block.addLiveIn(CP.getDstReg()); + } + BlockSeq.clear(); + } + } + + // SrcReg is guarateed to be the register whose live interval that is + // being merged. + li_->removeInterval(CP.getSrcReg()); + + // Update regalloc hint. + tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_); + + DEBUG({ + LiveInterval &DstInt = li_->getInterval(CP.getDstReg()); + dbgs() << "\tJoined. Result = "; + DstInt.print(dbgs(), tri_); + dbgs() << "\n"; + }); + + ++numJoins; + return true; +} + +/// ComputeUltimateVN - Assuming we are going to join two live intervals, +/// compute what the resultant value numbers for each value in the input two +/// ranges will be. This is complicated by copies between the two which can +/// and will commonly cause multiple value numbers to be merged into one. +/// +/// VN is the value number that we're trying to resolve. InstDefiningValue +/// keeps track of the new InstDefiningValue assignment for the result +/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of +/// whether a value in this or other is a copy from the opposite set. +/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have +/// already been assigned. +/// +/// ThisFromOther[x] - If x is defined as a copy from the other interval, this +/// contains the value number the copy is from. +/// +static unsigned ComputeUltimateVN(VNInfo *VNI, + SmallVector<VNInfo*, 16> &NewVNInfo, + DenseMap<VNInfo*, VNInfo*> &ThisFromOther, + DenseMap<VNInfo*, VNInfo*> &OtherFromThis, + SmallVector<int, 16> &ThisValNoAssignments, + SmallVector<int, 16> &OtherValNoAssignments) { + unsigned VN = VNI->id; + + // If the VN has already been computed, just return it. + if (ThisValNoAssignments[VN] >= 0) + return ThisValNoAssignments[VN]; + assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); + + // If this val is not a copy from the other val, then it must be a new value + // number in the destination. + DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); + if (I == ThisFromOther.end()) { + NewVNInfo.push_back(VNI); + return ThisValNoAssignments[VN] = NewVNInfo.size()-1; + } + VNInfo *OtherValNo = I->second; + + // Otherwise, this *is* a copy from the RHS. If the other side has already + // been computed, return it. + if (OtherValNoAssignments[OtherValNo->id] >= 0) + return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; + + // Mark this value number as currently being computed, then ask what the + // ultimate value # of the other value is. + ThisValNoAssignments[VN] = -2; + unsigned UltimateVN = + ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, + OtherValNoAssignments, ThisValNoAssignments); + return ThisValNoAssignments[VN] = UltimateVN; +} + + +// Find out if we have something like +// A = X +// B = X +// if so, we can pretend this is actually +// A = X +// B = A +// which allows us to coalesce A and B. +// VNI is the definition of B. LR is the life range of A that includes +// the slot just before B. If we return true, we add "B = X" to DupCopies. +static bool RegistersDefinedFromSameValue(LiveIntervals &li, + const TargetRegisterInfo &tri, + CoalescerPair &CP, + VNInfo *VNI, + LiveRange *LR, + SmallVector<MachineInstr*, 8> &DupCopies) { + // FIXME: This is very conservative. For example, we don't handle + // physical registers. + + MachineInstr *MI = VNI->getCopy(); + + if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys()) + return false; + + unsigned Dst = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + + if (!TargetRegisterInfo::isVirtualRegister(Src) || + !TargetRegisterInfo::isVirtualRegister(Dst)) + return false; + + unsigned A = CP.getDstReg(); + unsigned B = CP.getSrcReg(); + + if (B == Dst) + std::swap(A, B); + assert(Dst == A); + + VNInfo *Other = LR->valno; + if (!Other->isDefByCopy()) + return false; + const MachineInstr *OtherMI = Other->getCopy(); + + if (!OtherMI->isFullCopy()) + return false; + + unsigned OtherDst = OtherMI->getOperand(0).getReg(); + unsigned OtherSrc = OtherMI->getOperand(1).getReg(); + + if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) || + !TargetRegisterInfo::isVirtualRegister(OtherDst)) + return false; + + assert(OtherDst == B); + + if (Src != OtherSrc) + return false; + + // If the copies use two different value numbers of X, we cannot merge + // A and B. + LiveInterval &SrcInt = li.getInterval(Src); + if (SrcInt.getVNInfoAt(Other->def) != SrcInt.getVNInfoAt(VNI->def)) + return false; + + DupCopies.push_back(MI); + + return true; +} + +/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. +bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { + LiveInterval &RHS = li_->getInterval(CP.getSrcReg()); + DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; }); + + // If a live interval is a physical register, check for interference with any + // aliases. The interference check implemented here is a bit more conservative + // than the full interfeence check below. We allow overlapping live ranges + // only when one is a copy of the other. + if (CP.isPhys()) { + for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){ + if (!li_->hasInterval(*AS)) + continue; + const LiveInterval &LHS = li_->getInterval(*AS); + LiveInterval::const_iterator LI = LHS.begin(); + for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); + RI != RE; ++RI) { + LI = std::lower_bound(LI, LHS.end(), RI->start); + // Does LHS have an overlapping live range starting before RI? + if ((LI != LHS.begin() && LI[-1].end > RI->start) && + (RI->start != RI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; + }); + return false; + } + + // Check that LHS ranges beginning in this range are copies. + for (; LI != LHS.end() && LI->start < RI->end; ++LI) { + if (LI->start != LI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; + }); + return false; + } + } + } + } + } + + // Compute the final value assignment, assuming that the live ranges can be + // coalesced. + SmallVector<int, 16> LHSValNoAssignments; + SmallVector<int, 16> RHSValNoAssignments; + DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; + DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; + SmallVector<VNInfo*, 16> NewVNInfo; + + SmallVector<MachineInstr*, 8> DupCopies; + + LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg()); + DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; }); + + // Loop over the value numbers of the LHS, seeing if any are defined from + // the RHS. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? + continue; + + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; + + // Figure out the value # from the RHS. + LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + + // DstReg is known to be a register in the LHS interval. If the src is + // from the RHS interval, we can use its value #. + MachineInstr *MI = VNI->getCopy(); + if (!CP.isCoalescable(MI) && + !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies)) + continue; + + LHSValsDefinedFromRHS[VNI] = lr->valno; + } + + // Loop over the value numbers of the RHS, seeing if any are defined from + // the LHS. + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? + continue; + + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; + + // Figure out the value # from the LHS. + LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + + // DstReg is known to be a register in the RHS interval. If the src is + // from the LHS interval, we can use its value #. + MachineInstr *MI = VNI->getCopy(); + if (!CP.isCoalescable(MI) && + !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies)) + continue; + + RHSValsDefinedFromLHS[VNI] = lr->valno; + } + + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); + + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + ComputeUltimateVN(VNI, NewVNInfo, + LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, + LHSValNoAssignments, RHSValNoAssignments); + } + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + // If this value number isn't a copy from the LHS, it's a new number. + if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { + NewVNInfo.push_back(VNI); + RHSValNoAssignments[VN] = NewVNInfo.size()-1; + continue; + } + + ComputeUltimateVN(VNI, NewVNInfo, + RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, + RHSValNoAssignments, LHSValNoAssignments); + } + + // Armed with the mappings of LHS/RHS values to ultimate values, walk the + // interval lists to see if these intervals are coalescable. + LiveInterval::const_iterator I = LHS.begin(); + LiveInterval::const_iterator IE = LHS.end(); + LiveInterval::const_iterator J = RHS.begin(); + LiveInterval::const_iterator JE = RHS.end(); + + // Skip ahead until the first place of potential sharing. + if (I != IE && J != JE) { + if (I->start < J->start) { + I = std::upper_bound(I, IE, J->start); + if (I != LHS.begin()) --I; + } else if (J->start < I->start) { + J = std::upper_bound(J, JE, I->start); + if (J != RHS.begin()) --J; + } + } + + while (I != IE && J != JE) { + // Determine if these two live ranges overlap. + bool Overlaps; + if (I->start < J->start) { + Overlaps = I->end > J->start; + } else { + Overlaps = J->end > I->start; + } + + // If so, check value # info to determine if they are really different. + if (Overlaps) { + // If the live range overlap will map to the same value number in the + // result liverange, we can still coalesce them. If not, we can't. + if (LHSValNoAssignments[I->valno->id] != + RHSValNoAssignments[J->valno->id]) + return false; + // If it's re-defined by an early clobber somewhere in the live range, + // then conservatively abort coalescing. + if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC()) + return false; + } + + if (I->end < J->end) + ++I; + else + ++J; + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), + E = LHSValsDefinedFromRHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned LHSValID = LHSValNoAssignments[VNI->id]; + if (VNI->hasPHIKill()) + NewVNInfo[LHSValID]->setHasPHIKill(true); + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), + E = RHSValsDefinedFromLHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned RHSValID = RHSValNoAssignments[VNI->id]; + if (VNI->hasPHIKill()) + NewVNInfo[RHSValID]->setHasPHIKill(true); + } + + if (LHSValNoAssignments.empty()) + LHSValNoAssignments.push_back(-1); + if (RHSValNoAssignments.empty()) + RHSValNoAssignments.push_back(-1); + + SmallVector<unsigned, 8> SourceRegisters; + for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(), + E = DupCopies.end(); I != E; ++I) { + MachineInstr *MI = *I; + + // We have pretended that the assignment to B in + // A = X + // B = X + // was actually a copy from A. Now that we decided to coalesce A and B, + // transform the code into + // A = X + // X = X + // and mark the X as coalesced to keep the illusion. + unsigned Src = MI->getOperand(1).getReg(); + SourceRegisters.push_back(Src); + MI->getOperand(0).substVirtReg(Src, 0, *tri_); + + markAsJoined(MI); + } + + // If B = X was the last use of X in a liverange, we have to shrink it now + // that B = X is gone. + for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(), + E = SourceRegisters.end(); I != E; ++I) { + li_->shrinkToUses(&li_->getInterval(*I)); + } + + // If we get here, we know that we can coalesce the live ranges. Ask the + // intervals to coalesce themselves now. + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, + mri_); + return true; +} + +namespace { + // DepthMBBCompare - Comparison predicate that sort first based on the loop + // depth of the basic block (the unsigned), and then on the MBB number. + struct DepthMBBCompare { + typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; + bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { + // Deeper loops first + if (LHS.first != RHS.first) + return LHS.first > RHS.first; + + // Prefer blocks that are more connected in the CFG. This takes care of + // the most difficult copies first while intervals are short. + unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); + unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); + if (cl != cr) + return cl > cr; + + // As a last resort, sort by block number. + return LHS.second->getNumber() < RHS.second->getNumber(); + } + }; +} + +void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB, + std::vector<MachineInstr*> &TryAgain) { + DEBUG(dbgs() << MBB->getName() << ":\n"); + + SmallVector<MachineInstr*, 8> VirtCopies; + SmallVector<MachineInstr*, 8> PhysCopies; + SmallVector<MachineInstr*, 8> ImpDefCopies; + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E;) { + MachineInstr *Inst = MII++; + + // If this isn't a copy nor a extract_subreg, we can't join intervals. + unsigned SrcReg, DstReg; + if (Inst->isCopy()) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(1).getReg(); + } else if (Inst->isSubregToReg()) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(2).getReg(); + } else + continue; + + bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + ImpDefCopies.push_back(Inst); + else if (SrcIsPhys || DstIsPhys) + PhysCopies.push_back(Inst); + else + VirtCopies.push_back(Inst); + } + + // Try coalescing implicit copies and insert_subreg <undef> first, + // followed by copies to / from physical registers, then finally copies + // from virtual registers to virtual registers. + for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { + MachineInstr *TheCopy = ImpDefCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { + MachineInstr *TheCopy = PhysCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { + MachineInstr *TheCopy = VirtCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } +} + +void RegisterCoalescer::joinIntervals() { + DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); + + std::vector<MachineInstr*> TryAgainList; + if (loopInfo->empty()) { + // If there are no loops in the function, join intervals in function order. + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); + I != E; ++I) + CopyCoalesceInMBB(I, TryAgainList); + } else { + // Otherwise, join intervals in inner loops before other intervals. + // Unfortunately we can't just iterate over loop hierarchy here because + // there may be more MBB's than BB's. Collect MBB's for sorting. + + // Join intervals in the function prolog first. We want to join physical + // registers with virtual registers before the intervals got too long. + std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){ + MachineBasicBlock *MBB = I; + MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I)); + } + + // Sort by loop depth. + std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); + + // Finally, join intervals in loop nest order. + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) + CopyCoalesceInMBB(MBBs[i].second, TryAgainList); + } + + // Joining intervals can allow other intervals to be joined. Iteratively join + // until we make no progress. + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; + + for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { + MachineInstr *&TheCopy = TryAgainList[i]; + if (!TheCopy) + continue; + + bool Again = false; + bool Success = JoinCopy(TheCopy, Again); + if (Success || !Again) { + TheCopy= 0; // Mark this one as done. + ProgressMade = true; + } + } + } +} + +void RegisterCoalescer::releaseMemory() { + JoinedCopies.clear(); + ReMatCopies.clear(); + ReMatDefs.clear(); +} + +bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &fn.getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + li_ = &getAnalysis<LiveIntervals>(); + ldv_ = &getAnalysis<LiveDebugVariables>(); + AA = &getAnalysis<AliasAnalysis>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'); + + if (VerifyCoalescing) + mf_->verify(this, "Before register coalescing"); + + RegClassInfo.runOnMachineFunction(fn); + + // Join (coalesce) intervals if requested. + if (EnableJoining) { + joinIntervals(); + DEBUG({ + dbgs() << "********** INTERVALS POST JOINING **********\n"; + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); + I != E; ++I){ + I->second->print(dbgs(), tri_); + dbgs() << "\n"; + } + }); + } + + // Perform a final pass over the instructions and compute spill weights + // and remove identity moves. + SmallVector<unsigned, 4> DeadDefs; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* mbb = mbbi; + for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); + mii != mie; ) { + MachineInstr *MI = mii; + if (JoinedCopies.count(MI)) { + // Delete all coalesced copies. + bool DoDelete = true; + assert(MI->isCopyLike() && "Unrecognized copy instruction"); + unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + MI->getNumOperands() > 2) + // Do not delete extract_subreg, insert_subreg of physical + // registers unless the definition is dead. e.g. + // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 + // or else the scavenger may complain. LowerSubregs will + // delete them later. + DoDelete = false; + + if (MI->allDefsAreDead()) { + if (TargetRegisterInfo::isVirtualRegister(SrcReg) && + li_->hasInterval(SrcReg)) + li_->shrinkToUses(&li_->getInterval(SrcReg)); + DoDelete = true; + } + if (!DoDelete) { + // We need the instruction to adjust liveness, so make it a KILL. + if (MI->isSubregToReg()) { + MI->RemoveOperand(3); + MI->RemoveOperand(1); + } + MI->setDesc(tii_->get(TargetOpcode::KILL)); + mii = llvm::next(mii); + } else { + li_->RemoveMachineInstrFromMaps(MI); + mii = mbbi->erase(mii); + ++numPeep; + } + continue; + } + + // Now check if this is a remat'ed def instruction which is now dead. + if (ReMatDefs.count(MI)) { + bool isDead = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + DeadDefs.push_back(Reg); + if (MO.isDead()) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !mri_->use_nodbg_empty(Reg)) { + isDead = false; + break; + } + } + if (isDead) { + while (!DeadDefs.empty()) { + unsigned DeadDef = DeadDefs.back(); + DeadDefs.pop_back(); + RemoveDeadDef(li_->getInterval(DeadDef), MI); + } + li_->RemoveMachineInstrFromMaps(mii); + mii = mbbi->erase(mii); + continue; + } else + DeadDefs.clear(); + } + + ++mii; + + // Check for now unnecessary kill flags. + if (li_->isNotInMIMap(MI)) continue; + SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isKill()) continue; + unsigned reg = MO.getReg(); + if (!reg || !li_->hasInterval(reg)) continue; + if (!li_->getInterval(reg).killedAt(DefIdx)) { + MO.setIsKill(false); + continue; + } + // When leaving a kill flag on a physreg, check if any subregs should + // remain alive. + if (!TargetRegisterInfo::isPhysicalRegister(reg)) + continue; + for (const unsigned *SR = tri_->getSubRegisters(reg); + unsigned S = *SR; ++SR) + if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx)) + MI->addRegisterDefined(S, tri_); + } + } + } + + DEBUG(dump()); + DEBUG(ldv_->dump()); + if (VerifyCoalescing) + mf_->verify(this, "After register coalescing"); + return true; +} + +/// print - Implement the dump method. +void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { + li_->print(O, m); +} + +RegisterCoalescer *llvm::createRegisterCoalescer() { + return new RegisterCoalescer(); +} diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/RegisterCoalescer.h index 92f6c64..4131d91 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -1,4 +1,4 @@ -//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===// +//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,37 +7,38 @@ // //===----------------------------------------------------------------------===// // -// This file implements a simple register copy coalescing phase. +// This file contains the abstract interface for register coalescers, +// allowing them to interact with and query register allocators. // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H -#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "RegisterClassInfo.h" +#include "llvm/Support/IncludeFile.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/ADT/SmallPtrSet.h" + +#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H +#define LLVM_CODEGEN_REGISTER_COALESCER_H namespace llvm { - class SimpleRegisterCoalescing; - class LiveDebugVariables; + + class MachineFunction; + class RegallocQuery; + class AnalysisUsage; + class MachineInstr; class TargetRegisterInfo; + class TargetRegisterClass; class TargetInstrInfo; + class LiveDebugVariables; class VirtRegMap; class MachineLoopInfo; - /// CopyRec - Representation for copy instructions in coalescer queue. - /// - struct CopyRec { - MachineInstr *MI; - unsigned LoopDepth; - CopyRec(MachineInstr *mi, unsigned depth) - : MI(mi), LoopDepth(depth) {} - }; + class CoalescerPair; - class SimpleRegisterCoalescing : public MachineFunctionPass, - public RegisterCoalescer { + /// An abstract interface for register coalescers. Coalescers must + /// implement this interface to be part of the coalescer analysis + /// group. + class RegisterCoalescer : public MachineFunctionPass { MachineFunction* mf_; MachineRegisterInfo* mri_; const TargetMachine* tm_; @@ -61,41 +62,20 @@ namespace llvm { /// been remat'ed. SmallPtrSet<MachineInstr*, 8> ReMatDefs; - public: - static char ID; // Pass identifcation, replacement for typeid - SimpleRegisterCoalescing() : MachineFunctionPass(ID) { - initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void releaseMemory(); - - /// runOnMachineFunction - pass entry point - virtual bool runOnMachineFunction(MachineFunction&); - - bool coalesceFunction(MachineFunction &mf, RegallocQuery &) { - // This runs as an independent pass, so don't do anything. - return false; - } - - /// print - Implement the dump method. - virtual void print(raw_ostream &O, const Module* = 0) const; - - private: /// joinIntervals - join compatible live intervals void joinIntervals(); /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting /// copies that cannot yet be coalesced into the "TryAgain" list. void CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector<CopyRec> &TryAgain); + std::vector<MachineInstr*> &TryAgain); /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true /// if the copy was successfully coalesced away. If it is not currently /// possible to coalesce this interval, but it may be possible if other /// things get coalesced, then it returns true by reference in 'Again'. - bool JoinCopy(CopyRec &TheCopy, bool &Again); + bool JoinCopy(MachineInstr *TheCopy, bool &Again); /// JoinIntervals - Attempt to join these two intervals. On failure, this /// returns false. The output "SrcInt" will not have been modified, so we can @@ -155,8 +135,109 @@ namespace llvm { /// markAsJoined - Remember that CopyMI has already been joined. void markAsJoined(MachineInstr *CopyMI); + + public: + static char ID; // Class identification, replacement for typeinfo + RegisterCoalescer() : MachineFunctionPass(ID) { + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + } + + /// Register allocators must call this from their own + /// getAnalysisUsage to cover the case where the coalescer is not + /// a Pass in the proper sense and isn't managed by PassManager. + /// PassManager needs to know which analyses to make available and + /// which to invalidate when running the register allocator or any + /// pass that might call coalescing. The long-term solution is to + /// allow hierarchies of PassManagers. + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual void releaseMemory(); + + /// runOnMachineFunction - pass entry point + virtual bool runOnMachineFunction(MachineFunction&); + + /// print - Implement the dump method. + virtual void print(raw_ostream &O, const Module* = 0) const; }; + /// CoalescerPair - A helper class for register coalescers. When deciding if + /// two registers can be coalesced, CoalescerPair can determine if a copy + /// instruction would become an identity copy after coalescing. + class CoalescerPair { + const TargetInstrInfo &tii_; + const TargetRegisterInfo &tri_; + + /// dstReg_ - The register that will be left after coalescing. It can be a + /// virtual or physical register. + unsigned dstReg_; + + /// srcReg_ - the virtual register that will be coalesced into dstReg. + unsigned srcReg_; + + /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the + /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a + /// virtual register. + unsigned subIdx_; + + /// partial_ - True when the original copy was a partial subregister copy. + bool partial_; + + /// crossClass_ - True when both regs are virtual, and newRC is constrained. + bool crossClass_; + + /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy + /// instruction. + bool flipped_; + + /// newRC_ - The register class of the coalesced register, or NULL if dstReg_ + /// is a physreg. + const TargetRegisterClass *newRC_; + + public: + CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri) + : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0), + partial_(false), crossClass_(false), flipped_(false), newRC_(0) {} + + /// setRegisters - set registers to match the copy instruction MI. Return + /// false if MI is not a coalescable copy instruction. + bool setRegisters(const MachineInstr*); + + /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible + /// because dstReg_ is a physical register, or subIdx_ is set. + bool flip(); + + /// isCoalescable - Return true if MI is a copy instruction that will become + /// an identity copy after coalescing. + bool isCoalescable(const MachineInstr*) const; + + /// isPhys - Return true if DstReg is a physical register. + bool isPhys() const { return !newRC_; } + + /// isPartial - Return true if the original copy instruction did not copy the + /// full register, but was a subreg operation. + bool isPartial() const { return partial_; } + + /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's. + bool isCrossClass() const { return crossClass_; } + + /// isFlipped - Return true when getSrcReg is the register being defined by + /// the original copy instruction. + bool isFlipped() const { return flipped_; } + + /// getDstReg - Return the register (virtual or physical) that will remain + /// after coalescing. + unsigned getDstReg() const { return dstReg_; } + + /// getSrcReg - Return the virtual register that will be coalesced away. + unsigned getSrcReg() const { return srcReg_; } + + /// getSubIdx - Return the subregister index in DstReg that SrcReg will be + /// coalesced into, or 0. + unsigned getSubIdx() const { return subIdx_; } + + /// getNewRC - Return the register class of the coalesced register. + const TargetRegisterClass *getNewRC() const { return newRC_; } + }; } // End llvm namespace #endif diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp index c8de382..8b02ec4 100644 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ b/lib/CodeGen/RenderMachineFunction.cpp @@ -434,8 +434,7 @@ namespace llvm { rcEnd = tri->regclass_end(); rcItr != rcEnd; ++rcItr) { const TargetRegisterClass *trc = *rcItr; - unsigned capacity = std::distance(trc->allocation_order_begin(*mf), - trc->allocation_order_end(*mf)); + unsigned capacity = trc->getRawAllocationOrder(*mf).size(); if (capacity != 0) capacityMap[trc] = capacity; @@ -482,8 +481,7 @@ namespace llvm { rcItr != rcEnd; ++rcItr) { const TargetRegisterClass *trc = *rcItr; - if (trc->allocation_order_begin(*mf) == - trc->allocation_order_end(*mf)) + if (trc->getRawAllocationOrder(*mf).empty()) continue; unsigned worstAtI = getWorst(li->reg, trc); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 1302395..21375b2 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -19,23 +19,33 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <climits> using namespace llvm; +#ifndef NDEBUG +cl::opt<bool> StressSchedOpt( + "stress-sched", cl::Hidden, cl::init(false), + cl::desc("Stress test instruction scheduling")); +#endif + ScheduleDAG::ScheduleDAG(MachineFunction &mf) : TM(mf.getTarget()), TII(TM.getInstrInfo()), TRI(TM.getRegisterInfo()), MF(mf), MRI(mf.getRegInfo()), EntrySU(), ExitSU() { +#ifndef NDEBUG + StressSched = StressSchedOpt; +#endif } ScheduleDAG::~ScheduleDAG() {} /// getInstrDesc helper to handle SDNodes. -const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { +const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { if (!Node || !Node->isMachineOpcode()) return NULL; return &TII->get(Node->getMachineOpcode()); } @@ -307,6 +317,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); + if (I->isAssignedRegDep()) + dbgs() << " Reg=" << G->TRI->getName(I->getReg()); dbgs() << "\n"; } } diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index 6b7a8c64..f8b1bc7 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -45,6 +45,7 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, unsigned Reg = 0; for (SUnit::const_succ_iterator II = SU->Succs.begin(), EE = SU->Succs.end(); II != EE; ++II) { + if (II->isCtrl()) continue; // ignore chain preds if (II->getReg()) { Reg = II->getReg(); break; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 2363df4..446adfc 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -21,10 +21,11 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" @@ -205,7 +206,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { bool UnitLatencies = ForceUnitLatencies(); // Ask the target if address-backscheduling is desirable, and if so how much. - const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); // Remove any stale debug info; sometimes BuildSchedGraph is called again @@ -236,13 +237,13 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { continue; } - const TargetInstrDesc &TID = MI->getDesc(); - assert(!TID.isTerminator() && !MI->isLabel() && + const MCInstrDesc &MCID = MI->getDesc(); + assert(!MCID.isTerminator() && !MI->isLabel() && "Cannot schedule terminators or labels!"); // Create the SUnit for this MI. SUnit *SU = NewSUnit(MI); - SU->isCall = TID.isCall(); - SU->isCommutable = TID.isCommutable(); + SU->isCall = MCID.isCall(); + SU->isCommutable = MCID.isCommutable(); // Assign the Latency field of SU using target-provided information. if (UnitLatencies) @@ -309,13 +310,13 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { if (SpecialAddressLatency != 0 && !UnitLatencies && UseSU != &ExitSU) { MachineInstr *UseMI = UseSU->getInstr(); - const TargetInstrDesc &UseTID = UseMI->getDesc(); + const MCInstrDesc &UseMCID = UseMI->getDesc(); int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); if (RegUseIndex >= 0 && - (UseTID.mayLoad() || UseTID.mayStore()) && - (unsigned)RegUseIndex < UseTID.getNumOperands() && - UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) + (UseMCID.mayLoad() || UseMCID.mayStore()) && + (unsigned)RegUseIndex < UseMCID.getNumOperands() && + UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) LDataLatency += SpecialAddressLatency; } // Adjust the dependence latency using operand def/use @@ -352,17 +353,17 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { unsigned Count = I->second.second; const MachineInstr *UseMI = UseMO->getParent(); unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); - const TargetInstrDesc &UseTID = UseMI->getDesc(); + const MCInstrDesc &UseMCID = UseMI->getDesc(); // TODO: If we knew the total depth of the region here, we could // handle the case where the whole loop is inside the region but // is large enough that the isScheduleHigh trick isn't needed. - if (UseMOIdx < UseTID.getNumOperands()) { + if (UseMOIdx < UseMCID.getNumOperands()) { // Currently, we only support scheduling regions consisting of // single basic blocks. Check to see if the instruction is in // the same region by checking to see if it has the same parent. if (UseMI->getParent() != MI->getParent()) { unsigned Latency = SU->Latency; - if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) + if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) Latency += SpecialAddressLatency; // This is a wild guess as to the portion of the latency which // will be overlapped by work done outside the current @@ -374,7 +375,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { /*isMustAlias=*/false, /*isArtificial=*/true)); } else if (SpecialAddressLatency > 0 && - UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { + UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { // The entire loop body is within the current scheduling region // and the latency of this operation is assumed to be greater // than the latency of the loop. @@ -417,9 +418,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // produce more precise dependence information. #define STORE_LOAD_LATENCY 1 unsigned TrueMemOrderLatency = 0; - if (TID.isCall() || MI->hasUnmodeledSideEffects() || + if (MCID.isCall() || MI->hasUnmodeledSideEffects() || (MI->hasVolatileMemoryRef() && - (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) { + (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. for (std::map<const Value *, SUnit *>::iterator I = @@ -458,7 +459,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { PendingLoads.clear(); AliasMemDefs.clear(); AliasMemUses.clear(); - } else if (TID.mayStore()) { + } else if (MCID.mayStore()) { bool MayAlias = true; TrueMemOrderLatency = STORE_LOAD_LATENCY; if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { @@ -514,7 +515,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); - } else if (TID.mayLoad()) { + } else if (MCID.mayLoad()) { bool MayAlias = true; TrueMemOrderLatency = 0; if (MI->isInvariantLoad(AA)) { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index e6d7ded..0e005d3 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -16,11 +16,11 @@ #define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrItineraries.h" using namespace llvm; @@ -115,12 +115,12 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Use the itinerary for the underlying instruction to check for // free FU's in the scoreboard at the appropriate future cycles. - const TargetInstrDesc *TID = DAG->getInstrDesc(SU); - if (TID == NULL) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (MCID == NULL) { // Don't check hazards for non-machineinstr Nodes. return NoHazard; } - unsigned idx = TID->getSchedClass(); + unsigned idx = MCID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must find one of the stage's units free for every cycle the @@ -173,16 +173,16 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { // Use the itinerary for the underlying instruction to reserve FU's // in the scoreboard at the appropriate future cycles. - const TargetInstrDesc *TID = DAG->getInstrDesc(SU); - assert(TID && "The scheduler must filter non-machineinstrs"); - if (DAG->TII->isZeroCost(TID->Opcode)) + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + assert(MCID && "The scheduler must filter non-machineinstrs"); + if (DAG->TII->isZeroCost(MCID->Opcode)) return; ++IssueCount; unsigned cycle = 0; - unsigned idx = TID->getSchedClass(); + unsigned idx = MCID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must reserve one of the stage's units for every cycle the diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e843f5f..4f0d2ca 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -138,6 +138,10 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); + void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType); + /// combine - call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. @@ -234,6 +238,9 @@ namespace { SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits = true); + SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); @@ -994,7 +1001,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { dbgs() << "\nWith: "; RV.getNode()->dump(&DAG); dbgs() << '\n'); - + // Transfer debug value. DAG.TransferDbgValues(SDValue(N, 0), RV); WorkListRemover DeadNodes(*this); @@ -1303,16 +1310,6 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); } -/// isCarryMaterialization - Returns true if V is an ADDE node that is known to -/// return 0 or 1 depending on the carry flag. -static bool isCarryMaterialization(SDValue V) { - if (V.getOpcode() != ISD::ADDE) - return false; - - ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0)); - return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1); -} - SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1476,18 +1473,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } - // add (adde 0, 0, glue), X -> adde X, 0, glue - if (N0->hasOneUse() && isCarryMaterialization(N0)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), - DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0), - N0.getOperand(2)); - - // add X, (adde 0, 0, glue) -> adde X, 0, glue - if (N1->hasOneUse() && isCarryMaterialization(N1)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), - DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0), - N1.getOperand(2)); - return SDValue(); } @@ -1531,16 +1516,6 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { N->getDebugLoc(), MVT::Glue)); } - // addc (adde 0, 0, glue), X -> adde X, 0, glue - if (N0->hasOneUse() && isCarryMaterialization(N0)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1, - DAG.getConstant(0, VT), N0.getOperand(2)); - - // addc X, (adde 0, 0, glue) -> adde X, 0, glue - if (N1->hasOneUse() && isCarryMaterialization(N1)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0, - DAG.getConstant(0, VT), N1.getOperand(2)); - return SDValue(); } @@ -1591,6 +1566,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : + dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); // fold vector ops @@ -1622,6 +1599,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (A+B)-B -> A if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); + // fold C2-(A+C1) -> (C2-C1)-A + if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { + SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, + N1.getOperand(0)); + } // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && (N0.getOperand(1).getOpcode() == ISD::SUB || @@ -2508,6 +2491,244 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(); } +/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 +/// +SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) + bool LookPassAnd0 = false; + bool LookPassAnd1 = false; + if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) + std::swap(N0, N1); + if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() == ISD::AND) { + if (!N0.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01C || N01C->getZExtValue() != 0xFF00) + return SDValue(); + N0 = N0.getOperand(0); + LookPassAnd0 = true; + } + + if (N1.getOpcode() == ISD::AND) { + if (!N1.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N11C || N11C->getZExtValue() != 0xFF) + return SDValue(); + N1 = N1.getOperand(0); + LookPassAnd1 = true; + } + + if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) + return SDValue(); + if (!N0.getNode()->hasOneUse() || + !N1.getNode()->hasOneUse()) + return SDValue(); + + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N01C || !N11C) + return SDValue(); + if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) + return SDValue(); + + // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) + SDValue N00 = N0->getOperand(0); + if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { + if (!N00.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); + if (!N001C || N001C->getZExtValue() != 0xFF) + return SDValue(); + N00 = N00.getOperand(0); + LookPassAnd0 = true; + } + + SDValue N10 = N1->getOperand(0); + if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { + if (!N10.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); + if (!N101C || N101C->getZExtValue() != 0xFF00) + return SDValue(); + N10 = N10.getOperand(0); + LookPassAnd1 = true; + } + + if (N00 != N10) + return SDValue(); + + // Make sure everything beyond the low halfword is zero since the SRL 16 + // will clear the top bits. + unsigned OpSizeInBits = VT.getSizeInBits(); + if (DemandHighBits && OpSizeInBits > 16 && + (!LookPassAnd0 || !LookPassAnd1) && + !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) + return SDValue(); + + SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); + if (OpSizeInBits > 16) + Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, + DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); + return Res; +} + +/// isBSwapHWordElement - Return true if the specified node is an element +/// that makes up a 32-bit packed halfword byteswap. i.e. +/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { + if (!N.getNode()->hasOneUse()) + return false; + + unsigned Opc = N.getOpcode(); + if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) + return false; + + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!N1C) + return false; + + unsigned Num; + switch (N1C->getZExtValue()) { + default: + return false; + case 0xFF: Num = 0; break; + case 0xFF00: Num = 1; break; + case 0xFF0000: Num = 2; break; + case 0xFF000000: Num = 3; break; + } + + // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). + SDValue N0 = N.getOperand(0); + if (Opc == ISD::AND) { + if (Num == 0 || Num == 2) { + // (x >> 8) & 0xff + // (x >> 8) & 0xff0000 + if (N0.getOpcode() != ISD::SRL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { + // (x << 8) & 0xff00 + // (x << 8) & 0xff000000 + if (N0.getOpcode() != ISD::SHL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + } else if (Opc == ISD::SHL) { + // (x & 0xff) << 8 + // (x & 0xff0000) << 8 + if (Num != 0 && Num != 2) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { // Opc == ISD::SRL + // (x & 0xff00) >> 8 + // (x & 0xff000000) >> 8 + if (Num != 1 && Num != 3) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + + if (Parts[Num]) + return false; + + Parts[Num] = N0.getOperand(0).getNode(); + return true; +} + +/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is +/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +/// => (rotl (bswap x), 16) +SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + SmallVector<SDNode*,4> Parts(4, (SDNode*)0); + // Look for either + // (or (or (and), (and)), (or (and), (and))) + // (or (or (or (and), (and)), (and)), (and)) + if (N0.getOpcode() != ISD::OR) + return SDValue(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + + if (N1.getOpcode() == ISD::OR) { + // (or (or (and), (and)), (or (and), (and))) + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + SDValue N010 = N01.getOperand(0); + if (!isBSwapHWordElement(N010, Parts)) + return SDValue(); + SDValue N011 = N01.getOperand(1); + if (!isBSwapHWordElement(N011, Parts)) + return SDValue(); + } else { + // (or (or (or (and), (and)), (and)), (and)) + if (!isBSwapHWordElement(N1, Parts)) + return SDValue(); + if (!isBSwapHWordElement(N01, Parts)) + return SDValue(); + if (N00.getOpcode() != ISD::OR) + return SDValue(); + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + } + + // Make sure the parts are all coming from the same node. + if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) + return SDValue(); + + SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, + SDValue(Parts[0],0)); + + // Result of the bswap should be rotated by 16. If it's not legal, than + // do (x << 16) | (x >> 16). + SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) + return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); + else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); +} + SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2543,6 +2764,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, c) -> c iff (x & ~c) == 0 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; + + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) + SDValue BSwap = MatchBSwapHWord(N, N0, N1); + if (BSwap.getNode() != 0) + return BSwap; + BSwap = MatchBSwapHWordLow(N, N0, N1); + if (BSwap.getNode() != 0) + return BSwap; + // reassociate or SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); if (ROR.getNode() != 0) @@ -3030,6 +3260,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) return N0; + // fold (shl undef, x) -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) @@ -3696,6 +3929,28 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, return true; } +void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType) { + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), + &Ops[0], Ops.size())); + } +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -3784,27 +4039,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, - N->getDebugLoc(), VT, SOp)); - } - - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); - } - + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -3832,6 +4068,45 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } + // fold (sext (and/or/xor (load x), cst)) -> + // (and/or/xor (sextload x), (sext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::ZEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.sext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + if (N0.getOpcode() == ISD::SETCC) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. @@ -3990,27 +4265,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND, - N->getDebugLoc(), VT, SOp)); - } + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); + // fold (zext (and/or/xor (load x), cst)) -> + // (and/or/xor (zextload x), (zext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::SEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } - - return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4198,27 +4494,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, - N->getDebugLoc(), VT, SOp)); - } - - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); - } - + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ANY_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4555,6 +4832,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } + + // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) + if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode() != 0) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + BSwap, N1); + } + return SDValue(); } @@ -5180,7 +5467,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + (Level == llvm::Unrestricted || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, @@ -5204,7 +5492,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + (Level == llvm::Unrestricted || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, @@ -5648,12 +5937,17 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Now check for #3 and #4. bool RealUse = false; + + // Caches for hasPredecessorHelper + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), E = Ptr.getNode()->use_end(); I != E; ++I) { SDNode *Use = *I; if (Use == N) continue; - if (Use->isPredecessorOf(N)) + if (N->hasPredecessorHelper(Use, Visited, Worklist)) return false; if (!((Use->getOpcode() == ISD::LOAD && @@ -6431,8 +6725,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" SDValue Shorter = GetDemandedBits(Value, - APInt::getLowBitsSet(Value.getValueSizeInBits(), - ST->getMemoryVT().getSizeInBits())); + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits())); AddToWorkList(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, @@ -7156,7 +7451,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, const TargetData &TD = *TLI.getTargetData(); // Create a ConstantArray of the two constants. - Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2); + Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), TD.getPrefTypeAlignment(FPTy)); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 797f174..54a7d43 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -547,7 +547,7 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. const DbgValueInst *DI = cast<DbgValueInst>(Call); - const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { // Currently the optimizer can produce this; insert an undef to @@ -556,9 +556,14 @@ bool FastISel::SelectCall(const User *I) { .addReg(0U).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addImm(CI->getZExtValue()).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + if (CI->getBitWidth() > 64) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addCImm(CI).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(CI->getZExtValue()).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addFPImm(CF).addImm(DI->getOffset()) @@ -847,7 +852,7 @@ FastISel::SelectExtractValue(const User *U) { return false; // fast-isel can't handle aggregate constants at the moment // Get the actual result register, which is an offset from the base register. - unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->idx_begin(), EVI->idx_end()); + unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices()); SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -1085,7 +1090,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); return ResultReg; @@ -1095,7 +1100,7 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1115,7 +1120,7 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1137,7 +1142,7 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1160,7 +1165,7 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1181,7 +1186,7 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, uint64_t Imm1, uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1204,7 +1209,7 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1226,7 +1231,7 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1248,7 +1253,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); @@ -1264,7 +1269,7 @@ unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm1, uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index cb49a80..f0f4743 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -76,6 +76,12 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; const TargetRegisterClass *UseRC = NULL; + EVT VT = Node->getValueType(ResNo); + + // Stick to the preferred register classes for legal types. + if (TLI->isTypeLegal(VT)) + UseRC = TLI->getRegClassFor(VT); + if (!IsClone && !IsCloned) for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { @@ -100,10 +106,10 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, continue; Match = false; if (User->isMachineOpcode()) { - const TargetInstrDesc &II = TII->get(User->getMachineOpcode()); + const MCInstrDesc &II = TII->get(User->getMachineOpcode()); const TargetRegisterClass *RC = 0; if (i+II.getNumDefs() < II.getNumOperands()) - RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI); + RC = TII->getRegClass(II, i+II.getNumDefs(), TRI); if (!UseRC) UseRC = RC; else if (RC) { @@ -121,10 +127,9 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, break; } - EVT VT = Node->getValueType(ResNo); const TargetRegisterClass *SrcRC = 0, *DstRC = 0; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); - + // Figure out the register class to create for the destreg. if (VRBase) { DstRC = MRI->getRegClass(VRBase); @@ -173,7 +178,7 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, } void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, - const TargetInstrDesc &II, + const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && @@ -184,7 +189,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. unsigned VRBase = 0; - const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI); + const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -237,7 +242,7 @@ unsigned InstrEmitter::getVR(SDValue Op, Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); - // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc + // IMPLICIT_DEF can produce any type of result so its MCInstrDesc // does not include operand register class info. if (!VReg) { const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); @@ -260,7 +265,7 @@ unsigned InstrEmitter::getVR(SDValue Op, void InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && @@ -270,9 +275,9 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned VReg = getVR(Op, VRBaseMap); assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); - const TargetInstrDesc &TID = MI->getDesc(); - bool isOptDef = IIOpNum < TID.getNumOperands() && - TID.OpInfo[IIOpNum].isOptionalDef(); + const MCInstrDesc &MCID = MI->getDesc(); + bool isOptDef = IIOpNum < MCID.getNumOperands() && + MCID.OpInfo[IIOpNum].isOptionalDef(); // If the instruction requires a register in a different class, create // a new virtual register and copy the value into it. @@ -280,8 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) - DstRC = II->OpInfo[IIOpNum].getRegClass(TRI); - assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && + DstRC = TII->getRegClass(*II, IIOpNum, TRI); + assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) && "Don't have operand info for this instruction!"); if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); @@ -307,7 +312,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, while (Idx > 0 && MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) --Idx; - bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; + bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1; if (isTied) isKill = false; } @@ -325,7 +330,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, /// assertions only. void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { @@ -543,17 +548,18 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, void InstrEmitter::EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsClone, bool IsCloned) { - const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0)); + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); unsigned NewVReg = MRI->createVirtualRegister(RC); MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); unsigned NumOps = Node->getNumOperands(); - assert((NumOps & 1) == 0 && - "REG_SEQUENCE must have an even number of operands!"); - const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); - for (unsigned i = 0; i != NumOps; ++i) { + assert((NumOps & 1) == 1 && + "REG_SEQUENCE must have an odd number of operands!"); + const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); - if (i & 1) { + if ((i & 1) == 0) { unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); @@ -591,7 +597,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); } // Otherwise, we're going to create an instruction here. - const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); MachineInstrBuilder MIB = BuildMI(*MF, DL, II); if (SD->getKind() == SDDbgValue::SDNODE) { SDNode *Node = SD->getSDNode(); @@ -610,12 +616,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - // FIXME: SDDbgValue constants aren't updated with legalization, so it's - // possible to have i128 constants in them at this point. Dwarf writer - // does not handle i128 constants at the moment so, as a crude workaround, - // just drop the debug info if this happens. - if (!CI->getValue().isSignedIntN(64)) - MIB.addReg(0U); + if (CI->getBitWidth() > 64) + MIB.addCImm(CI); else MIB.addImm(CI->getSExtValue()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { @@ -666,7 +668,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // We want a unique VR for each IMPLICIT_DEF use. return; - const TargetInstrDesc &II = TII->get(Opc); + const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NodeOperands = CountOperands(Node); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; @@ -695,9 +697,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); else { // Collect declared implicit uses. - const TargetInstrDesc &TID = TII->get(F->getMachineOpcode()); - UsedRegs.append(TID.getImplicitUses(), - TID.getImplicitUses() + TID.getNumImplicitUses()); + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) @@ -849,6 +851,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } break; case InlineAsm::Kind_RegDefEarlyClobber: + case InlineAsm::Kind_Clobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 02c044c..19fc044 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -22,7 +22,7 @@ namespace llvm { -class TargetInstrDesc; +class MCInstrDesc; class SDDbgValue; class InstrEmitter { @@ -49,7 +49,7 @@ class InstrEmitter { unsigned ResNo) const; void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, - const TargetInstrDesc &II, + const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap); @@ -63,7 +63,7 @@ class InstrEmitter { /// not in the required register class. void AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); @@ -73,7 +73,7 @@ class InstrEmitter { /// assertions only. void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 62d777c..d06e2bd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -58,17 +58,6 @@ class SelectionDAGLegalize { /// against each other, including inserted libcalls. SmallVector<SDValue, 8> LastCALLSEQ; - enum LegalizeAction { - Legal, // The target natively supports this operation. - Promote, // This operation should be executed in a larger type. - Expand // Try to expand this to other ops, otherwise use a libcall. - }; - - /// ValueTypeActions - This is a bitvector that contains two bits for each - /// value type, where the two bits correspond to the LegalizeAction enum. - /// This can be queried with "getTypeAction(VT)". - TargetLowering::ValueTypeActionImpl ValueTypeActions; - /// LegalizedNodes - For nodes that are of legal width, and that have more /// than one use, this map indicates what regularized operand to use. This /// allows us to avoid legalizing the same thing more than once. @@ -87,25 +76,11 @@ class SelectionDAGLegalize { public: explicit SelectionDAGLegalize(SelectionDAG &DAG); - /// getTypeAction - Return how we should legalize values of this type, either - /// it is already legal or we need to expand it into multiple registers of - /// smaller integer type, or we need to promote it to a larger type. - LegalizeAction getTypeAction(EVT VT) const { - return (LegalizeAction)TLI.getTypeAction(*DAG.getContext(), VT); - } - - /// isTypeLegal - Return true if this type is legal on this target. - /// - bool isTypeLegal(EVT VT) const { - return getTypeAction(VT) == Legal; - } - void LegalizeDAG(); private: - /// LegalizeOp - We know that the specified value has a legal type. - /// Recursively ensure that the operands have legal types, then return the - /// result. + /// LegalizeOp - Return a legal replacement for the given operation, with + /// all legal operands. SDValue LegalizeOp(SDValue O); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -220,10 +195,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), - DAG(dag), - ValueTypeActions(TLI.getValueTypeActions()) { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); + DAG(dag) { } void SelectionDAGLegalize::LegalizeDAG() { @@ -753,7 +725,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { DebugLoc dl = ST->getDebugLoc(); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && - getTypeAction(MVT::i32) == Legal) { + TLI.isTypeLegal(MVT::i32)) { Tmp3 = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), MVT::i32); @@ -763,14 +735,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. - if (getTypeAction(MVT::i64) == Legal) { + if (TLI.isTypeLegal(MVT::i64)) { Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); } - if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this // xform is certainly not worth it. @@ -794,10 +766,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { return SDValue(0, 0); } -/// LegalizeOp - We know that the specified value has a legal type, and -/// that its operands are legal. Now ensure that the operation itself -/// is legal, recursively ensuring that the operands' operations remain -/// legal. +/// LegalizeOp - Return a legal replacement for the given operation, with +/// all legal operands. SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. return Op; @@ -806,11 +776,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert(getTypeAction(Node->getValueType(i)) == Legal && + assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal && "Unexpected illegal type!"); for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - assert((isTypeLegal(Node->getOperand(i).getValueType()) || + assert((TLI.getTypeAction(*DAG.getContext(), + Node->getOperand(i).getValueType()) == + TargetLowering::TypeLegal || Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); @@ -1354,7 +1327,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } break; case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), @@ -1374,6 +1347,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Load.getValue(1)); break; } + + // If this is a promoted vector load, and the vector element types are + // legal, then scalarize it. + if (ExtType == ISD::EXTLOAD && SrcVT.isVector() && + TLI.isTypeLegal(Node->getValueType(0).getScalarType())) { + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0).getScalarType(), + Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, + Node->getValueType(0), &LoadVals[0], LoadVals.size()); + + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. + break; + } + + // If this is a promoted vector load, and the vector element types are + // illegal, create the promoted vector from bitcasted segments. + if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) { + EVT MemElemTy = Node->getValueType(0).getScalarType(); + EVT SrcSclrTy = SrcVT.getScalarType(); + unsigned SizeRatio = + (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits()); + + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, + SrcVT.getScalarType(), + Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + if (TLI.isBigEndian()) { + // MSB (which is garbage, comes first) + LoadVals.push_back(ScalarLoad.getValue(0)); + for (unsigned i = 0; i<SizeRatio-1; ++i) + LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); + } else { + // LSB (which is data, comes first) + for (unsigned i = 0; i<SizeRatio-1; ++i) + LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); + LoadVals.push_back(ScalarLoad.getValue(0)); + } + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getScalarType(), NumElem*SizeRatio); + SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, + TempWideVector, &LoadVals[0], LoadVals.size()); + + // Cast to the correct type + ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes); + + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. + break; + + } + // FIXME: This does not work for vectors on most targets. Sign- and // zero-extend operations are currently folded into extending loads, // whether they are legal or not, and then we end up here without any @@ -1548,9 +1606,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case TargetLowering::Custom: Result = TLI.LowerOperation(Result, DAG); break; - case Expand: + case TargetLowering::Expand: + + EVT WideScalarVT = Tmp3.getValueType().getScalarType(); + EVT NarrowScalarVT = StVT.getScalarType(); + + // The Store type is illegal, must scalarize the vector store. + SmallVector<SDValue, 8> Stores; + bool ScalarLegal = TLI.isTypeLegal(WideScalarVT); + if (!TLI.isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) { + unsigned NumElem = StVT.getVectorNumElements(); + + unsigned ScalarSize = StVT.getScalarType().getSizeInBits(); + // Round odd types to the next pow of two. + if (!isPowerOf2_32(ScalarSize)) + ScalarSize = NextPowerOf2(ScalarSize); + // Types smaller than 8 bits are promoted to 8 bits. + ScalarSize = std::max<unsigned>(ScalarSize, 8); + // Store stride + unsigned Stride = ScalarSize/8; + assert(isPowerOf2_32(Stride) && "Stride must be a power of two"); + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); + + + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize); + + Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex); + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), + isVolatile, isNonTemporal, Alignment); + Stores.push_back(Store); + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; + } + + // The Store type is illegal, must scalarize the vector store. + // However, the scalar type is illegal. Must bitcast the result + // and store it in smaller parts. + if (!TLI.isTypeLegal(StVT) && StVT.isVector()) { + unsigned WideNumElem = StVT.getVectorNumElements(); + unsigned Stride = NarrowScalarVT.getSizeInBits()/8; + + unsigned SizeRatio = + (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits()); + + EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT, + SizeRatio*WideNumElem); + + // Cast the wide elem vector to wider vec with smaller elem type. + // Example <2 x i64> -> <4 x i32> + Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3); + + for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) { + // Extract elment i + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); + // bump pointer. + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + + // Store if, this element is: + // - First element on big endian, or + // - Last element on little endian + if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) || + ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) { + SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), + isVolatile, isNonTemporal, Alignment); + Stores.push_back(Store); + } + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; + } + + // TRUNCSTORE:i16 i32 -> STORE i16 - assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); + assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); @@ -1709,7 +1849,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { SDValue SignBit; EVT FloatVT = Tmp2.getValueType(); EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); - if (isTypeLegal(IVT)) { + if (TLI.isTypeLegal(IVT)) { // Convert to an integer with the same sign bit. SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); } else { @@ -3031,7 +3171,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); - if (getTypeAction(EltVT) == Promote) + if (!TLI.isTypeLegal(EltVT)) EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); unsigned NumElems = VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops; @@ -3184,6 +3324,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_PPCF128)); break; + case ISD::FMA: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_PPCF128)); + break; case ISD::FP16_TO_FP32: Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 27a466b..e6835d8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -74,6 +74,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; @@ -294,6 +295,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { NVT, &Op, 1, false, N->getDebugLoc()); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + NVT, Ops, 3, false, N->getDebugLoc()); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), @@ -837,6 +851,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; @@ -989,6 +1004,19 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, 3, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index b8da57f..e7c77dd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -19,6 +19,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/DerivedTypes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -191,10 +192,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { if (NOutVT.bitsEq(NInVT)) // The input promotes to the same size. Convert the promoted value. return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); - if (NInVT.isVector()) - // Promote vector element via memory load/store. - return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, - CreateStackStoreLoad(InOp, OutVT)); break; case TargetLowering::TypeSoftenFloat: // Promote the integer operand by hand. @@ -204,8 +201,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { break; case TargetLowering::TypeScalarizeVector: // Convert the element to an integer and promote it by hand. - return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, - BitConvertToInteger(GetScalarizedVector(InOp))); + if (!NOutVT.isVector()) + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + break; case TargetLowering::TypeSplitVector: { // For example, i32 = BITCAST v2i16 on alpha. Convert the split // pieces of the input into integers and reassemble in the final type. @@ -339,8 +338,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // (eg: because the value being converted is too big), then the result of the // original operation was undefined anyway, so the assert is still correct. return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? - ISD::AssertZext : ISD::AssertSext, dl, - NVT, Res, DAG.getValueType(N->getValueType(0))); + ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, + DAG.getValueType(N->getValueType(0).getScalarType())); } SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { @@ -370,7 +369,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, DAG.getValueType(N->getOperand(0).getValueType())); if (N->getOpcode() == ISD::ZERO_EXTEND) - return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType()); + return DAG.getZeroExtendInReg(Res, dl, + N->getOperand(0).getValueType().getScalarType()); assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); return Res; } @@ -520,20 +520,44 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; + SDValue InOp = N->getOperand(0); + DebugLoc dl = N->getDebugLoc(); - switch (getTypeAction(N->getOperand(0).getValueType())) { + switch (getTypeAction(InOp.getValueType())) { default: llvm_unreachable("Unknown type action!"); case TargetLowering::TypeLegal: case TargetLowering::TypeExpandInteger: - Res = N->getOperand(0); + Res = InOp; break; case TargetLowering::TypePromoteInteger: - Res = GetPromotedInteger(N->getOperand(0)); + Res = GetPromotedInteger(InOp); break; + case TargetLowering::TypeSplitVector: + EVT InVT = InOp.getValueType(); + assert(InVT.isVector() && "Cannot split scalar types"); + unsigned NumElts = InVT.getVectorNumElements(); + assert(NumElts == NVT.getVectorNumElements() && + "Dst and Src must have the same number of elements"); + EVT EltVT = InVT.getScalarType(); + assert(isPowerOf2_32(NumElts) && + "Promoted vector type must be a power of two"); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2); + EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), + NumElts/2); + + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, + DAG.getIntPtrConstant(0)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, + DAG.getIntPtrConstant(NumElts/2)); + EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); + EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2); } // Truncate to NVT instead of VT - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res); + return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); } SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { @@ -970,7 +994,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue Op = GetPromotedInteger(N->getOperand(0)); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); - return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); + return DAG.getZeroExtendInReg(Op, dl, + N->getOperand(0).getValueType().getScalarType()); } @@ -1069,6 +1094,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; case ISD::UADDO: case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; + case ISD::UMULO: + case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -2146,6 +2173,86 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, ReplaceValueWith(SDValue(N, 1), Ofl); } +void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + const Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = TLI.getPointerTy(); + const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + DebugLoc dl = N->getDebugLoc(); + + // A divide for UMULO should be faster than a function call. + if (N->getOpcode() == ISD::UMULO) { + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SplitInteger(MUL, Lo, Hi); + + // A divide for UMULO will be faster than a function call. Select to + // make sure we aren't using 0. + SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + RHS, DAG.getConstant(0, VT), ISD::SETNE); + SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, + DAG.getConstant(1, VT), RHS); + SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); + SDValue Overflow; + Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); + ReplaceValueWith(SDValue(N, 1), Overflow); + return; + } + + // Replace this with a libcall that will check overflow. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::MULO_I32; + else if (VT == MVT::i64) + LC = RTLIB::MULO_I64; + else if (VT == MVT::i128) + LC = RTLIB::MULO_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!"); + + SDValue Temp = DAG.CreateStackTemporary(PtrVT); + // Temporary for the overflow value, default it to zero. + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, + DAG.getConstant(0, PtrVT), Temp, + MachinePointerInfo(), false, false, 0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + EVT ArgVT = N->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = N->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + } + + // Also pass the address of the overflow check. + Entry.Node = Temp; + Entry.Ty = PtrTy->getPointerTo(); + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + + SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(Chain, RetTy, true, false, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + true, Func, Args, DAG, dl); + + SplitInteger(CallInfo.first, Lo, Hi); + SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, + MachinePointerInfo(), false, false, 0); + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, + DAG.getConstant(0, PtrVT), + ISD::SETNE); + // Use the overflow from the libcall everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -2638,18 +2745,18 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp0 = N->getOperand(0); EVT InVT = InOp0.getValueType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); - unsigned OutNumElems = N->getValueType(0).getVectorNumElements(); + unsigned OutNumElems = OutVT.getVectorNumElements(); EVT NOutVTElem = NOutVT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); SDValue BaseIdx = N->getOperand(1); SmallVector<SDValue, 8> Ops; + Ops.reserve(OutNumElems); for (unsigned i = 0; i != OutNumElems; ++i) { // Extract the element from the original vector. @@ -2681,18 +2788,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { SDValue V0 = GetPromotedInteger(N->getOperand(0)); SDValue V1 = GetPromotedInteger(N->getOperand(1)); - EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + EVT OutVT = V0.getValueType(); - return DAG.getVectorShuffle(OutVT, dl, V0,V1, &NewMask[0]); + return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { - - SDValue InOp0 = N->getOperand(0); - EVT InVT = InOp0.getValueType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); @@ -2702,6 +2804,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SmallVector<SDValue, 8> Ops; + Ops.reserve(NumElems); for (unsigned i = 0; i != NumElems; ++i) { SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); Ops.push_back(Op); @@ -2714,10 +2817,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue InOp0 = N->getOperand(0); - EVT InVT = InOp0.getValueType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - assert(!InVT.isVector() && "Input must not be a scalar"); + assert(!N->getOperand(0).getValueType().isVector() && + "Input must be a scalar"); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); @@ -2730,12 +2831,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { - - SDValue InOp0 = N->getOperand(0); - EVT InVT = InOp0.getValueType(); - EVT InElVT = InVT.getVectorElementType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); @@ -2744,7 +2839,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp0); + SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + N->getOperand(0)); SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(1)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b2f966b..952797d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -201,7 +201,7 @@ private: EVT OldVT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); Op = GetPromotedInteger(Op); - return DAG.getZeroExtendInReg(Op, dl, OldVT); + return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); } // Integer Result Promotion. @@ -318,6 +318,7 @@ private: void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); @@ -377,6 +378,7 @@ private: SDValue SoftenFloatRes_FLOG(SDNode *N); SDValue SoftenFloatRes_FLOG2(SDNode *N); SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); SDValue SoftenFloatRes_FNEG(SDNode *N); @@ -441,6 +443,7 @@ private: void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 5d0f923..ffff10c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -182,9 +182,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; - case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 9595f69..b5698f9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2164,6 +2164,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, if (MemVT.getSizeInBits() <= WidenEltWidth) break; if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; @@ -2179,6 +2180,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 7b560d1..b275c63 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -249,14 +249,14 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { NewSU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) NewSU->isCommutable = true; // LoadNode may already exist. This can happen when there is another @@ -422,10 +422,10 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// FIXME: Move to SelectionDAG? static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = TID.getNumDefs(); - for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = MCID.getNumDefs(); + for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -490,7 +490,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, ++i; // Skip the ID value. if (InlineAsm::isRegDefKind(Flags) || - InlineAsm::isRegDefEarlyClobberKind(Flags)) { + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); @@ -504,10 +505,10 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, } if (!Node->isMachineOpcode()) continue; - const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); - if (!TID.ImplicitDefs) + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { + for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8d61a89..12b1838 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -276,6 +276,43 @@ private: }; } // end anonymous namespace +/// GetCostForDef - Looks up the register class and cost for a given definition. +/// Typically this just means looking up the representative register class, +/// but for untyped values (MVT::untyped) it means inspecting the node's +/// opcode to determine what register class is being generated. +static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, + const TargetLowering *TLI, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + unsigned &RegClass, unsigned &Cost) { + EVT VT = RegDefPos.GetValue(); + + // Special handling for untyped values. These values can only come from + // the expansion of custom DAG-to-DAG patterns. + if (VT == MVT::untyped) { + const SDNode *Node = RegDefPos.GetNode(); + unsigned Opcode = Node->getMachineOpcode(); + + if (Opcode == TargetOpcode::REG_SEQUENCE) { + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Idx = RegDefPos.GetIdx(); + const MCInstrDesc Desc = TII->get(Opcode); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); + RegClass = RC->getID(); + // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a + // better way to determine it. + Cost = 1; + } else { + RegClass = TLI->getRepRegClassFor(VT)->getID(); + Cost = TLI->getRepRegClassCostFor(VT); + } +} /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { @@ -800,14 +837,14 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { NewSU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) NewSU->isCommutable = true; InitNumRegDefsLeft(NewSU); @@ -987,10 +1024,10 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// FIXME: Move to SelectionDAG? static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = TID.getNumDefs(); - for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = MCID.getNumDefs(); + for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1055,7 +1092,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { ++i; // Skip the ID value. if (InlineAsm::isRegDefKind(Flags) || - InlineAsm::isRegDefEarlyClobberKind(Flags)) { + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); @@ -1070,10 +1108,10 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { if (!Node->isMachineOpcode()) continue; - const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); - if (!TID.ImplicitDefs) + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) + for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } @@ -1369,6 +1407,21 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } }; +#ifndef NDEBUG +template<class SF> +struct reverse_sort : public queue_sort { + SF &SortFunc; + reverse_sort(SF &sf) : SortFunc(sf) {} + reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} + + bool operator()(SUnit* left, SUnit* right) const { + // reverse left/right rather than simply !SortFunc(left, right) + // to expose different paths in the comparison logic. + return SortFunc(right, left); + } +}; +#endif // NDEBUG + /// bu_ls_rr_sort - Priority function for bottom up register pressure // reduction scheduler. struct bu_ls_rr_sort : public queue_sort { @@ -1569,20 +1622,33 @@ protected: }; template<class SF> -class RegReductionPriorityQueue : public RegReductionPQBase { - static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) { - std::vector<SUnit *>::iterator Best = Q.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), - E = Q.end(); I != E; ++I) - if (Picker(*Best, *I)) - Best = I; - SUnit *V = *Best; - if (Best != prior(Q.end())) - std::swap(*Best, Q.back()); - Q.pop_back(); - return V; +static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { + std::vector<SUnit *>::iterator Best = Q.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; +} + +template<class SF> +SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { +#ifndef NDEBUG + if (DAG->StressSched) { + reverse_sort<SF> RPicker(Picker); + return popFromQueueImpl(Q, RPicker); } +#endif + (void)DAG; + return popFromQueueImpl(Q, Picker); +} +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { SF Picker; public: @@ -1603,7 +1669,7 @@ public: SUnit *pop() { if (Queue.empty()) return NULL; - SUnit *V = popFromQueue(Queue, Picker); + SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); V->NodeQueueId = 0; return V; } @@ -1613,7 +1679,7 @@ public: std::vector<SUnit*> DumpQueue = Queue; SF DumpPicker = Picker; while (!DumpQueue.empty()) { - SUnit *SU = popFromQueue(DumpQueue, DumpPicker); + SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); if (isBottomUp()) dbgs() << "Height " << SU->getHeight() << ": "; else @@ -1778,9 +1844,9 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { } for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); RegDefPos.IsValid(); RegDefPos.Advance()) { - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) return true; } @@ -1891,9 +1957,10 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { if (SkipRegDefs) continue; - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + RegPressure[RCId] += Cost; break; } } @@ -1906,16 +1973,16 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { if (SkipRegDefs > 0) continue; - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) { + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + if (RegPressure[RCId] < Cost) { // Register pressure tracking is imprecise. This can happen. But we try // hard not to let it happen because it likely results in poor scheduling. DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); RegPressure[RCId] = 0; } else { - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + RegPressure[RCId] -= Cost; } } dumpRegPressure(); @@ -1962,13 +2029,9 @@ void RegReductionPQBase::UnscheduledNode(SUnit *SU) { unsigned POpc = PN->getMachineOpcode(); if (POpc == TargetOpcode::IMPLICIT_DEF) continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { + if (POpc == TargetOpcode::EXTRACT_SUBREG || + POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { EVT VT = PN->getValueType(0); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); @@ -2543,11 +2606,11 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { if (SU->isTwoAddress) { unsigned Opc = SU->getNode()->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - unsigned NumRes = TID.getNumDefs(); - unsigned NumOps = TID.getNumOperands() - NumRes; + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; for (unsigned i = 0; i != NumOps; ++i) { - if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { SDNode *DU = SU->getNode()->getOperand(i).getNode(); if (DU->getNodeId() != -1 && Op->OrigNode == &(*SUnits)[DU->getNodeId()]) @@ -2727,11 +2790,11 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { bool isLiveOut = hasOnlyLiveOutUses(SU); unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - unsigned NumRes = TID.getNumDefs(); - unsigned NumOps = TID.getNumOperands() - NumRes; + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; for (unsigned j = 0; j != NumOps; ++j) { - if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) + if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) continue; SDNode *DU = SU->getNode()->getOperand(j).getNode(); if (DU->getNodeId() == -1) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 9f2f012..71f07d6 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -17,11 +17,12 @@ #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -111,7 +112,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, unsigned ResNo = User->getOperand(2).getResNo(); if (Def->isMachineOpcode()) { - const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); + const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); if (ResNo >= II.getNumDefs() && II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { PhysReg = Reg; @@ -255,8 +256,8 @@ void ScheduleDAGSDNodes::ClusterNodes() { continue; unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - if (TID.mayLoad()) + const MCInstrDesc &MCID = TII->get(Opc); + if (MCID.mayLoad()) // Cluster loads from "near" addresses into combined SUnits. ClusterNeighboringLoads(Node); } @@ -378,7 +379,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { } void ScheduleDAGSDNodes::AddSchedEdges() { - const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); @@ -390,14 +391,14 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (MainNode->isMachineOpcode()) { unsigned Opc = MainNode->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(Opc); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { SU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) SU->isCommutable = true; } @@ -435,7 +436,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // it requires a cross class copy (cost < 0). That means we are only // treating "expensive to copy" register dependency as physical register // dependency. This may change in the future though. - if (Cost >= 0) + if (Cost >= 0 && !StressSched) PhysReg = 0; // If this is a ctrl dep, latency is 1. @@ -520,14 +521,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { for (;DefIdx < NodeNumDefs; ++DefIdx) { if (!Node->hasAnyUseOfValue(DefIdx)) continue; - if (Node->isMachineOpcode() && - Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) { - // Propagate the incoming (full-register) type. I doubt it's needed. - ValueType = Node->getOperand(0).getValueType(); - } - else { - ValueType = Node->getValueType(DefIdx); - } + ValueType = Node->getValueType(DefIdx); ++DefIdx; return; // Found a normal regdef. } @@ -649,7 +643,7 @@ static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, // order number right after the N. MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N); + ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N); for (unsigned i = 0, e = DVs.size(); i != e; ++i) { if (DVs[i]->isInvalidated()) continue; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index b5f68f3..9c27b2e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -135,6 +135,14 @@ namespace llvm { return ValueType; } + const SDNode *GetNode() const { + return Node; + } + + unsigned GetIdx() const { + return DefIdx-1; + } + void Advance(); private: void InitNodeNumDefs(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 68eeb60..35ea0bb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -598,7 +598,7 @@ void SelectionDAG::DeallocateNode(SDNode *N) { Ordering->remove(N); // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. - SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N); + ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N); for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) DbgVals[i]->setIsInvalidated(); } @@ -3326,13 +3326,13 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && "Expecting memcpy / memset source to meet alignment requirement!"); - // If 'SrcAlign' is zero, that means the memory operation does not need load - // the value, i.e. memset or memcpy from constant string. Otherwise, it's - // the inferred alignment of the source. 'DstAlign', on the other hand, is the - // specified alignment of the memory operation. If it is zero, that means - // it's possible to change the alignment of the destination. 'MemcpyStrSrc' - // indicates whether the memcpy source is constant so it does not need to be - // loaded. + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, NonScalarIntSafe, MemcpyStrSrc, DAG.getMachineFunction()); @@ -4037,6 +4037,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); @@ -4142,6 +4144,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); @@ -4165,6 +4169,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -4191,6 +4197,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); @@ -4216,6 +4224,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (VT == SVT) return getStore(Chain, dl, Val, Ptr, MMO); @@ -5508,9 +5518,9 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { return; SDNode *FromNode = From.getNode(); SDNode *ToNode = To.getNode(); - SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode); + ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode); SmallVector<SDDbgValue *, 2> ClonedDVs; - for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end(); + for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end(); I != E; ++I) { SDDbgValue *Dbg = *I; if (Dbg->getKind() == SDDbgValue::SDNODE) { @@ -5691,24 +5701,39 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, return false; } -/// isPredecessorOf - Return true if this node is a predecessor of N. This node -/// is either an operand of N or it can be reached by traversing up the operands. -/// NOTE: this is an expensive method. Use it carefully. -bool SDNode::isPredecessorOf(SDNode *N) const { - SmallPtrSet<SDNode *, 32> Visited; - SmallVector<SDNode *, 16> Worklist; - Worklist.push_back(N); +/// hasPredecessor - Return true if N is a predecessor of this node. +/// N is either an operand of this node, or can be reached by recursively +/// traversing up the operands. +/// NOTE: This is an expensive method. Use it carefully. +bool SDNode::hasPredecessor(const SDNode *N) const { + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + return hasPredecessorHelper(N, Visited, Worklist); +} - do { - N = Worklist.pop_back_val(); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDNode *Op = N->getOperand(i).getNode(); - if (Op == this) - return true; +bool SDNode::hasPredecessorHelper(const SDNode *N, + SmallPtrSet<const SDNode *, 32> &Visited, + SmallVector<const SDNode *, 16> &Worklist) const { + if (Visited.empty()) { + Worklist.push_back(this); + } else { + // Take a look in the visited set. If we've already encountered this node + // we needn't search further. + if (Visited.count(N)) + return true; + } + + // Haven't visited N yet. Continue the search. + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + SDNode *Op = M->getOperand(i).getNode(); if (Visited.insert(Op)) Worklist.push_back(Op); + if (Op == N) + return true; } - } while (!Worklist.empty()); + } return false; } @@ -5863,6 +5888,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSUB: return "fsub"; case ISD::FMUL: return "fmul"; case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 7a8a975..81b03ee 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -286,22 +286,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract - unsigned NumElts = ValueVT.getVectorNumElements(); - SmallVector<SDValue, 8> NewOps; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT.getScalarType(), Val ,DAG.getIntPtrConstant(i)); - SDValue Cast; + bool Smaller = ValueVT.bitsLE(PartVT); + return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT, Val); - bool Smaller = ValueVT.bitsLE(PartVT); - - Cast = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT.getScalarType(), Ext); - - NewOps.push_back(Cast); - } - return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, - &NewOps[0], NewOps.size()); } // Trivial bitcast if the types are the same size and the destination @@ -310,9 +298,17 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + // Handle cases such as i8 -> <1 x i1> + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial scalar-to-vector conversions should get here!"); + + if (ValueVT.getVectorNumElements() == 1 && + ValueVT.getVectorElementType() != PartVT) { + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT.getScalarType(), Val); + } + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } @@ -453,7 +449,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && - PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorElementType() == ValueVT.getVectorElementType() && PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in @@ -475,28 +471,23 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); } else if (PartVT.isVector() && PartVT.getVectorElementType().bitsGE( - ValueVT.getVectorElementType())&& + ValueVT.getVectorElementType()) && PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract - unsigned NumElts = ValueVT.getVectorNumElements(); - SmallVector<SDValue, 8> NewOps; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ValueVT.getScalarType(), Val ,DAG.getIntPtrConstant(i)); - SDValue Cast = DAG.getNode(ISD::ANY_EXTEND, - DL, PartVT.getScalarType(), Ext); - NewOps.push_back(Cast); - } - Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, - &NewOps[0], NewOps.size()); + bool Smaller = PartVT.bitsLE(ValueVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); } else{ // Vector -> scalar conversion. - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getIntPtrConstant(0)); + + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); } Parts[0] = Val; @@ -1280,6 +1271,24 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, return true; } +/// Return branch probability calculated by BranchProbabilityInfo for IR blocks. +uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) { + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (!BPI) + return 0; + BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock()); + BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock()); + return BPI->getEdgeWeight(SrcBB, DstBB); +} + +void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) { + uint32_t weight = getEdgeWeight(Src, Dst); + Src->addSuccessor(Dst, weight); +} + + static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast<Instruction>(V)) return I->getParent() == BB; @@ -1549,8 +1558,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } // Update successor info - SwitchBB->addSuccessor(CB.TrueBB); - SwitchBB->addSuccessor(CB.FalseBB); + addSuccessorWithWeight(SwitchBB, CB.TrueBB); + addSuccessorWithWeight(SwitchBB, CB.FalseBB); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1694,8 +1703,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - SwitchBB->addSuccessor(B.Default); - SwitchBB->addSuccessor(MBB); + addSuccessorWithWeight(SwitchBB, B.Default); + addSuccessorWithWeight(SwitchBB, MBB); SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, CopyTo, RangeCmp, @@ -1718,7 +1727,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, VT); SDValue Cmp; - if (CountPopulation_64(B.Mask) == 1) { + unsigned PopCount = CountPopulation_64(B.Mask); + if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC(getCurDebugLoc(), @@ -1726,6 +1736,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ShiftOp, DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), ISD::SETEQ); + } else if (PopCount == BB.Range) { + // There is only one zero bit in the range, test for it directly. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(VT), + ShiftOp, + DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), + ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, @@ -1740,8 +1757,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ISD::SETNE); } - SwitchBB->addSuccessor(B.TargetBB); - SwitchBB->addSuccessor(NextMBB); + addSuccessorWithWeight(SwitchBB, B.TargetBB); + addSuccessorWithWeight(SwitchBB, NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1981,8 +1998,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, // table. MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); CurMF->insert(BBI, JumpTableBB); - CR.CaseBB->addSuccessor(Default); - CR.CaseBB->addSuccessor(JumpTableBB); + + addSuccessorWithWeight(CR.CaseBB, Default); + addSuccessorWithWeight(CR.CaseBB, JumpTableBB); // Build a vector of destination BBs, corresponding to each target // of the jump table. If the value of the jump table slot corresponds to @@ -2009,7 +2027,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; - JumpTableBB->addSuccessor(*I); + addSuccessorWithWeight(JumpTableBB, *I); } } @@ -2428,8 +2446,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { succs.push_back(I.getSuccessor(i)); array_pod_sort(succs.begin(), succs.end()); succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); - for (unsigned i = 0, e = succs.size(); i != e; ++i) - IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); + for (unsigned i = 0, e = succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]]; + addSuccessorWithWeight(IndirectBrMBB, Succ); + } DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -2489,6 +2509,22 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op1.getValueType(), Op1, Op2)); } +void SelectionDAGBuilder::visitSDiv(const User &I) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + // Turn exact SDivs into multiplications. + // FIXME: This should be in DAGCombiner, but it doesn't have access to the + // exact bit. + if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && + !isa<ConstantSDNode>(Op1) && + isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) + setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG)); + else + setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(), + Op1, Op2)); +} + void SelectionDAGBuilder::visitICmp(const User &I) { ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) @@ -2855,7 +2891,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa<UndefValue>(Op0); bool FromUndef = isa<UndefValue>(Op1); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -2895,7 +2931,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Type *ValTy = I.getType(); bool OutOfUndef = isa<UndefValue>(Op0); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); SmallVector<EVT, 4> ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); @@ -4623,6 +4659,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::pow: visitPow(I); return 0; + case Intrinsic::fma: + setValue(&I, DAG.getNode(ISD::FMA, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + return 0; case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, MVT::i16, getValue(I.getArgOperand(0)))); @@ -4759,6 +4802,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; + + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + setValue(&I, getValue(I.getArgOperand(0))); + return 0; + } + case Intrinsic::trap: { StringRef TrapFuncName = getTrapFunctionName(); if (TrapFuncName.empty()) { @@ -4789,15 +4839,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return implVisitAluOverflow(I, ISD::SMULO); case Intrinsic::prefetch: { - SDValue Ops[4]; + SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, DAG.getVTList(MVT::Other), - &Ops[0], 4, + &Ops[0], 5, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ @@ -5415,54 +5466,6 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; } // end anonymous namespace -/// isAllocatableRegister - If the specified register is safe to allocate, -/// i.e. it isn't a stack pointer or some other special register, return the -/// register class for the register. Otherwise, return null. -static const TargetRegisterClass * -isAllocatableRegister(unsigned Reg, MachineFunction &MF, - const TargetLowering &TLI, - const TargetRegisterInfo *TRI) { - EVT FoundVT = MVT::Other; - const TargetRegisterClass *FoundRC = 0; - for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), - E = TRI->regclass_end(); RCI != E; ++RCI) { - EVT ThisVT = MVT::Other; - - const TargetRegisterClass *RC = *RCI; - // If none of the value types for this register class are valid, we - // can't use it. For example, 64-bit reg classes on 32-bit targets. - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (TLI.isTypeLegal(*I)) { - // If we have already found this register in a different register class, - // choose the one with the largest VT specified. For example, on - // PowerPC, we favor f64 register classes over f32. - if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { - ThisVT = *I; - break; - } - } - } - - if (ThisVT == MVT::Other) continue; - - // NOTE: This isn't ideal. In particular, this might allocate the - // frame pointer in functions that need it (due to them not being taken - // out of allocation, because a variable sized allocation hasn't been seen - // yet). This is a slight code pessimization, but should still work. - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - if (*I == Reg) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; - } - } - return FoundRC; -} - /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the /// register allocator to handle the assignment process. However, if the asm @@ -5597,52 +5600,6 @@ static void GetRegistersForValue(SelectionDAG &DAG, return; } - // This is a reference to a register class that doesn't directly correspond - // to an LLVM register class. Allocate NumRegs consecutive, available, - // registers from the class. - std::vector<unsigned> RegClassRegs - = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); - unsigned NumAllocated = 0; - for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { - unsigned Reg = RegClassRegs[i]; - // See if this register is available. - if ((isOutReg && OutputRegs.count(Reg)) || // Already used. - (isInReg && InputRegs.count(Reg))) { // Already used. - // Make sure we find consecutive registers. - NumAllocated = 0; - continue; - } - - // Check to see if this register is allocatable (i.e. don't give out the - // stack pointer). - const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI); - if (!RC) { // Couldn't allocate this register. - // Reset NumAllocated to make sure we return consecutive registers. - NumAllocated = 0; - continue; - } - - // Okay, this register is good, we can use it. - ++NumAllocated; - - // If we allocated enough consecutive registers, succeed. - if (NumAllocated == NumRegs) { - unsigned RegStart = (i-NumAllocated)+1; - unsigned RegEnd = i+1; - // Mark all of the allocated registers used. - for (unsigned i = RegStart; i != RegEnd; ++i) - Regs.push_back(RegClassRegs[i]); - - OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), - OpInfo.ConstraintVT); - OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); - return; - } - } - // Otherwise, we couldn't allocate enough registers for this. } @@ -5749,10 +5706,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || - (OpInfo.ConstraintVT.getSizeInBits() != - Input.ConstraintVT.getSizeInBits())) { + (MatchRC.second != InputRC.second)) { report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); @@ -6015,8 +5975,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Don't know how to handle indirect register inputs yet!"); // Copy the input into the appropriate registers. - if (OpInfo.AssignedRegs.Regs.empty() || - !OpInfo.AssignedRegs.areValueTypesLegal(TLI)) + if (OpInfo.AssignedRegs.Regs.empty()) report_fatal_error("Couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -6031,8 +5990,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) - OpInfo.AssignedRegs.AddInlineAsmOperands( - InlineAsm::Kind_RegDefEarlyClobber, + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, false, 0, DAG, AsmNodeOperands); break; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8376d41..a0884eb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -434,6 +434,9 @@ private: const Value* SV, MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); + + uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); + void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); @@ -464,7 +467,7 @@ private: void visitSRem(const User &I) { visitBinary(I, ISD::SREM); } void visitFRem(const User &I) { visitBinary(I, ISD::FREM); } void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); } - void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); } + void visitSDiv(const User &I); void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); } void visitAnd (const User &I) { visitBinary(I, ISD::AND); } void visitOr (const User &I) { visitBinary(I, ISD::OR); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 771b008..87bb296 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -68,6 +69,11 @@ static cl::opt<bool> EnableFastISelAbort("fast-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"fast\" instruction fails")); +static cl::opt<bool> +UseMBPI("use-mbpi", + cl::desc("use Machine Branch Probability Info"), + cl::init(true), cl::Hidden); + #ifndef NDEBUG static cl::opt<bool> ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, @@ -186,6 +192,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, DAGSize(0) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -199,6 +206,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); + if (UseMBPI && OptLevel != CodeGenOpt::None) + AU.addRequired<BranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -262,6 +271,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { CurDAG->init(*MF); FuncInfo->set(Fn, *MF); + + if (UseMBPI && OptLevel != CodeGenOpt::None) + FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); + else + FuncInfo->BPI = 0; + SDB->init(GFI, *AA); SelectAllBasicBlocks(Fn); @@ -339,9 +354,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const MachineBasicBlock *MBB = I; for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { - const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); + const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); - if ((TID.isCall() && !TID.isReturn()) || + if ((MCID.isCall() && !MCID.isReturn()) || II->isStackAligningInlineAsm()) { MFI->setHasCalls(true); goto done; @@ -666,7 +681,7 @@ void SelectionDAGISel::PrepareEHLandingPad() { // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB); - const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); + const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); @@ -2596,9 +2611,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (EmitNodeInfo & OPFL_MemRefs) { // Only attach load or store memory operands if the generated // instruction may load or store. - const TargetInstrDesc &TID = TM.getInstrInfo()->get(TargetOpc); - bool mayLoad = TID.mayLoad(); - bool mayStore = TID.mayStore(); + const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc); + bool mayLoad = MCID.mayLoad(); + bool mayStore = MCID.mayStore(); unsigned NumMemRefs = 0; for (SmallVector<MachineMemOperand*, 2>::const_iterator I = diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cf6069a..2626ac3 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -81,6 +81,9 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::MUL_I32] = "__mulsi3"; Names[RTLIB::MUL_I64] = "__muldi3"; Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::MULO_I32] = "__mulosi4"; + Names[RTLIB::MULO_I64] = "__mulodi4"; + Names[RTLIB::MULO_I128] = "__muloti4"; Names[RTLIB::SDIV_I8] = "__divqi3"; Names[RTLIB::SDIV_I16] = "__divhi3"; Names[RTLIB::SDIV_I32] = "__divsi3"; @@ -136,6 +139,10 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::REM_F64] = "fmod"; Names[RTLIB::REM_F80] = "fmodl"; Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::FMA_F32] = "fmaf"; + Names[RTLIB::FMA_F64] = "fma"; + Names[RTLIB::FMA_F80] = "fmal"; + Names[RTLIB::FMA_PPCF128] = "fmal"; Names[RTLIB::POWI_F32] = "__powisf2"; Names[RTLIB::POWI_F64] = "__powidf2"; Names[RTLIB::POWI_F80] = "__powixf2"; @@ -673,10 +680,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, NewVT = EltTy; IntermediateVT = NewVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + EVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -965,8 +978,14 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -1762,9 +1781,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::BITCAST: // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && - Op.getOperand(0).getValueType().isFloatingPoint() && - !Op.getOperand(0).getValueType().isVector()) { + if (!Op.getOperand(0).getValueType().isVector() && + NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && + Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { @@ -1902,7 +1921,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // comparisons. if (isa<ConstantSDNode>(N0.getNode())) return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); - + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -2608,7 +2627,6 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { TargetLowering::ConstraintType TargetLowering::getConstraintType(const std::string &Constraint) const { - // FIXME: lots more standard ones to handle. if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; @@ -2661,9 +2679,9 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { - + if (Constraint.length() > 1) return; - + char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; @@ -2722,13 +2740,6 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } -std::vector<unsigned> TargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - return std::vector<unsigned>(); -} - - std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { @@ -2853,7 +2864,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } - + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. if (const StructType *STy = dyn_cast<StructType>(OpTy)) if (STy->getNumElements() == 1) @@ -2955,10 +2966,13 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || - (OpInfo.ConstraintVT.getSizeInBits() != - Input.ConstraintVT.getSizeInBits())) { + (MatchRC.second != InputRC.second)) { report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); @@ -3204,6 +3218,32 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } +/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication +/// with the multiplicative inverse of the constant. +SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, + SelectionDAG &DAG) const { + ConstantSDNode *C = cast<ConstantSDNode>(Op2); + APInt d = C->getAPIntValue(); + assert(d != 0 && "Division by zero!"); + + // Shift the value upfront if it is even, so the LSB is one. + unsigned ShAmt = d.countTrailingZeros(); + if (ShAmt) { + // TODO: For UDIV use SRL instead of SRA. + SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt); + d = d.ashr(ShAmt); + } + + // Calculate the multiplicative inverse, using Newton's method. + APInt t, xn = d; + while ((t = d*xn) != 1) + xn *= APInt(d.getBitWidth(), 2) - t; + + Op2 = DAG.getConstant(xn, Op1.getValueType()); + return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); +} + /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 6ab0cb0..5a253a4 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -45,7 +45,8 @@ namespace { /// StackEntryTy - Abstract type of a link in the shadow stack. /// - const StructType *StackEntryTy; + StructType *StackEntryTy; + StructType *FrameMapTy; /// Roots - GC roots in the current function. Each is a pair of the /// intrinsic call and its corresponding alloca. @@ -164,8 +165,7 @@ namespace { InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, - Args.begin(), Args.end(), - CI->getName(), CallBB); + Args, CI->getName(), CallBB); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); CI->replaceAllUsesWith(II); @@ -194,31 +194,31 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { // Truncate the ShadowStackDescriptor if some metadata is null. unsigned NumMeta = 0; - SmallVector<Constant*,16> Metadata; + SmallVector<Constant*, 16> Metadata; for (unsigned I = 0; I != Roots.size(); ++I) { Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1)); if (!C->isNullValue()) NumMeta = I + 1; Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); } + Metadata.resize(NumMeta); + const Type *Int32Ty = Type::getInt32Ty(F.getContext()); + Constant *BaseElts[] = { - ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false), - ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false), + ConstantInt::get(Int32Ty, Roots.size(), false), + ConstantInt::get(Int32Ty, NumMeta, false), }; Constant *DescriptorElts[] = { - ConstantStruct::get(F.getContext(), BaseElts, 2, false), - ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), - Metadata.begin(), NumMeta) + ConstantStruct::get(FrameMapTy, BaseElts), + ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata) }; - Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2, - false); - - std::string TypeName("gc_map."); - TypeName += utostr(NumMeta); - F.getParent()->addTypeName(TypeName, FrameMap->getType()); + Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()}; + StructType *STy = StructType::createNamed("gc_map."+utostr(NumMeta), EltTys); + + Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts); // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems // that, short of multithreaded LLVM, it should be safe; all that is @@ -246,17 +246,12 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { // doInitialization creates the generic version of this type. - std::vector<const Type*> EltTys; + std::vector<Type*> EltTys; EltTys.push_back(StackEntryTy); for (size_t I = 0; I != Roots.size(); I++) EltTys.push_back(Roots[I].second->getAllocatedType()); - Type *Ty = StructType::get(F.getContext(), EltTys); - - std::string TypeName("gc_stackentry."); - TypeName += F.getName(); - F.getParent()->addTypeName(TypeName, Ty); - - return Ty; + + return StructType::createNamed("gc_stackentry."+F.getName().str(), EltTys); } /// doInitialization - If this module uses the GC intrinsics, find them now. If @@ -267,13 +262,12 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. // void *Meta[]; // May be absent for roots without metadata. // }; - std::vector<const Type*> EltTys; + std::vector<Type*> EltTys; // 32 bits is ok up to a 32GB stack frame. :) EltTys.push_back(Type::getInt32Ty(M.getContext())); // Specifies length of variable length array. EltTys.push_back(Type::getInt32Ty(M.getContext())); - StructType *FrameMapTy = StructType::get(M.getContext(), EltTys); - M.addTypeName("gc_map", FrameMapTy); + FrameMapTy = StructType::createNamed("gc_map", EltTys); PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); // struct StackEntry { @@ -281,18 +275,14 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { // FrameMap *Map; // Pointer to constant FrameMap. // void *Roots[]; // Stack roots (in-place array, so we pretend). // }; - OpaqueType *RecursiveTy = OpaqueType::get(M.getContext()); - + + StackEntryTy = StructType::createNamed(M.getContext(), "gc_stackentry"); + EltTys.clear(); - EltTys.push_back(PointerType::getUnqual(RecursiveTy)); + EltTys.push_back(PointerType::getUnqual(StackEntryTy)); EltTys.push_back(FrameMapPtrTy); - PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys); - - RecursiveTy->refineAbstractTypeTo(LinkTyH.get()); - StackEntryTy = cast<StructType>(LinkTyH.get()); + StackEntryTy->setBody(EltTys); const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); - M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from - // a FunctionPass? // Get the root chain if it already exists. Head = M.getGlobalVariable("llvm_gc_root_chain"); @@ -399,7 +389,7 @@ bool ShadowStackGC::performCustomLowering(Function &F) { Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry, 0,1,"gc_frame.map"); - AtEntry.CreateStore(FrameMap, EntryMapPtr); + AtEntry.CreateStore(FrameMap, EntryMapPtr); // After all the allocas... for (unsigned I = 0, E = Roots.size(); I != E; ++I) { diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp deleted file mode 100644 index 221bec5..0000000 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ /dev/null @@ -1,1539 +0,0 @@ -//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a simple register coalescing pass that attempts to -// aggressively coalesce every register copy that it can. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "regcoalescing" -#include "SimpleRegisterCoalescing.h" -#include "VirtRegMap.h" -#include "LiveDebugVariables.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/Value.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterCoalescer.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include <algorithm> -#include <cmath> -using namespace llvm; - -STATISTIC(numJoins , "Number of interval joins performed"); -STATISTIC(numCrossRCs , "Number of cross class joins performed"); -STATISTIC(numCommutes , "Number of instruction commuting performed"); -STATISTIC(numExtends , "Number of copies extended"); -STATISTIC(NumReMats , "Number of instructions re-materialized"); -STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); -STATISTIC(numAborts , "Number of times interval joining aborted"); - -char SimpleRegisterCoalescing::ID = 0; -static cl::opt<bool> -EnableJoining("join-liveintervals", - cl::desc("Coalesce copies (default=true)"), - cl::init(true)); - -static cl::opt<bool> -DisableCrossClassJoin("disable-cross-class-join", - cl::desc("Avoid coalescing cross register class copies"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -EnablePhysicalJoin("join-physregs", - cl::desc("Join physical register copies"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -VerifyCoalescing("verify-coalescing", - cl::desc("Verify machine instrs before and after register coalescing"), - cl::Hidden); - -INITIALIZE_AG_PASS_BEGIN(SimpleRegisterCoalescing, RegisterCoalescer, - "simple-register-coalescing", "Simple Register Coalescing", - false, false, true) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) -INITIALIZE_PASS_DEPENDENCY(PHIElimination) -INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_AG_PASS_END(SimpleRegisterCoalescing, RegisterCoalescer, - "simple-register-coalescing", "Simple Register Coalescing", - false, false, true) - -char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID; - -void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - AU.addRequired<LiveDebugVariables>(); - AU.addPreserved<LiveDebugVariables>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); - AU.addPreservedID(MachineDominatorsID); - AU.addPreservedID(StrongPHIEliminationID); - AU.addPreservedID(PHIEliminationID); - AU.addPreservedID(TwoAddressInstructionPassID); - MachineFunctionPass::getAnalysisUsage(AU); -} - -void SimpleRegisterCoalescing::markAsJoined(MachineInstr *CopyMI) { - /// Joined copies are not deleted immediately, but kept in JoinedCopies. - JoinedCopies.insert(CopyMI); - - /// Mark all register operands of CopyMI as <undef> so they won't affect dead - /// code elimination. - for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), - E = CopyMI->operands_end(); I != E; ++I) - if (I->isReg()) - I->setIsUndef(true); -} - -/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA -/// being the source and IntB being the dest, thus this defines a value number -/// in IntB. If the source value number (in IntA) is defined by a copy from B, -/// see if we can merge these two pieces of B into a single value number, -/// eliminating a copy. For example: -/// -/// A3 = B0 -/// ... -/// B1 = A3 <- this copy -/// -/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 -/// value number to be replaced with B0 (which simplifies the B liveinterval). -/// -/// This returns true if an interval was modified. -/// -bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, - MachineInstr *CopyMI) { - // Bail if there is no dst interval - can happen when merging physical subreg - // operations. - if (!li_->hasInterval(CP.getDstReg())) - return false; - - LiveInterval &IntA = - li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); - LiveInterval &IntB = - li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); - - // BValNo is a value number in B that is defined by a copy from A. 'B3' in - // the example above. - LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - if (BLR == IntB.end()) return false; - VNInfo *BValNo = BLR->valno; - - // Get the location that B is defined at. Two options: either this value has - // an unknown definition point or it is defined at CopyIdx. If unknown, we - // can't process it. - if (!BValNo->isDefByCopy()) return false; - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - - // AValNo is the value number in A that defines the copy, A3 in the example. - SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); - LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); - // The live range might not exist after fun with physreg coalescing. - if (ALR == IntA.end()) return false; - VNInfo *AValNo = ALR->valno; - // If it's re-defined by an early clobber somewhere in the live range, then - // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. - // See PR3149: - // 172 %ECX<def> = MOV32rr %reg1039<kill> - // 180 INLINEASM <es:subl $5,$1 - // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, - // %EAX<kill>, - // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0 - // 188 %EAX<def> = MOV32rr %EAX<kill> - // 196 %ECX<def> = MOV32rr %ECX<kill> - // 204 %ECX<def> = MOV32rr %ECX<kill> - // 212 %EAX<def> = MOV32rr %EAX<kill> - // 220 %EAX<def> = MOV32rr %EAX - // 228 %reg1039<def> = MOV32rr %ECX<kill> - // The early clobber operand ties ECX input to the ECX def. - // - // The live interval of ECX is represented as this: - // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) - // The coalescer has no idea there was a def in the middle of [174,230]. - if (AValNo->hasRedefByEC()) - return false; - - // If AValNo is defined as a copy from IntB, we can potentially process this. - // Get the instruction that defines this value number. - if (!CP.isCoalescable(AValNo->getCopy())) - return false; - - // Get the LiveRange in IntB that this value number starts with. - LiveInterval::iterator ValLR = - IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); - if (ValLR == IntB.end()) - return false; - - // Make sure that the end of the live range is inside the same block as - // CopyMI. - MachineInstr *ValLREndInst = - li_->getInstructionFromIndex(ValLR->end.getPrevSlot()); - if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) - return false; - - // Okay, we now know that ValLR ends in the same block that the CopyMI - // live-range starts. If there are no intervening live ranges between them in - // IntB, we can merge them. - if (ValLR+1 != BLR) return false; - - // If a live interval is a physical register, conservatively check if any - // of its aliases is overlapping the live interval of the virtual register. - // If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) - if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) { - DEBUG({ - dbgs() << "\t\tInterfere with alias "; - li_->getInterval(*AS).print(dbgs(), tri_); - }); - return false; - } - } - - DEBUG({ - dbgs() << "Extending: "; - IntB.print(dbgs(), tri_); - }); - - SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; - // We are about to delete CopyMI, so need to remove it as the 'instruction - // that defines this value #'. Update the valnum with the new defining - // instruction #. - BValNo->def = FillerStart; - BValNo->setCopy(0); - - // Okay, we can merge them. We need to insert a new liverange: - // [ValLR.end, BLR.begin) of either value number, then we merge the - // two value numbers. - IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); - - // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - LiveInterval &SRLI = li_->getInterval(*SR); - SRLI.addRange(LiveRange(FillerStart, FillerEnd, - SRLI.getNextValue(FillerStart, 0, - li_->getVNInfoAllocator()))); - } - } - - // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) { - // If B1 is killed by a PHI, then the merged live range must also be killed - // by the same PHI, as B0 and B1 can not overlap. - bool HasPHIKill = BValNo->hasPHIKill(); - IntB.MergeValueNumberInto(BValNo, ValLR->valno); - if (HasPHIKill) - ValLR->valno->setHasPHIKill(true); - } - DEBUG({ - dbgs() << " result = "; - IntB.print(dbgs(), tri_); - dbgs() << "\n"; - }); - - // If the source instruction was killing the source register before the - // merge, unset the isKill marker given the live range has been extended. - int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); - if (UIdx != -1) { - ValLREndInst->getOperand(UIdx).setIsKill(false); - } - - // If the copy instruction was killing the destination register before the - // merge, find the last use and trim the live range. That will also add the - // isKill marker. - if (ALR->end == CopyIdx) - li_->shrinkToUses(&IntA); - - ++numExtends; - return true; -} - -/// HasOtherReachingDefs - Return true if there are definitions of IntB -/// other than BValNo val# that can reach uses of AValno val# of IntA. -bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, - LiveInterval &IntB, - VNInfo *AValNo, - VNInfo *BValNo) { - for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); - AI != AE; ++AI) { - if (AI->valno != AValNo) continue; - LiveInterval::Ranges::iterator BI = - std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); - if (BI != IntB.ranges.begin()) - --BI; - for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { - if (BI->valno == BValNo) - continue; - if (BI->start <= AI->start && BI->end > AI->start) - return true; - if (BI->start > AI->start && BI->start < AI->end) - return true; - } - } - return false; -} - -/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with -/// IntA being the source and IntB being the dest, thus this defines a value -/// number in IntB. If the source value number (in IntA) is defined by a -/// commutable instruction and its other operand is coalesced to the copy dest -/// register, see if we can transform the copy into a noop by commuting the -/// definition. For example, -/// -/// A3 = op A2 B0<kill> -/// ... -/// B1 = A3 <- this copy -/// ... -/// = op A3 <- more uses -/// -/// ==> -/// -/// B2 = op B0 A2<kill> -/// ... -/// B1 = B2 <- now an identify copy -/// ... -/// = op B2 <- more uses -/// -/// This returns true if an interval was modified. -/// -bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, - MachineInstr *CopyMI) { - // FIXME: For now, only eliminate the copy by commuting its def when the - // source register is a virtual register. We want to guard against cases - // where the copy is a back edge copy and commuting the def lengthen the - // live interval of the source register to the entire loop. - if (CP.isPhys() && CP.isFlipped()) - return false; - - // Bail if there is no dst interval. - if (!li_->hasInterval(CP.getDstReg())) - return false; - - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); - - LiveInterval &IntA = - li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); - LiveInterval &IntB = - li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - - // BValNo is a value number in B that is defined by a copy from A. 'B3' in - // the example above. - VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); - if (!BValNo || !BValNo->isDefByCopy()) - return false; - - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - - // AValNo is the value number in A that defines the copy, A3 in the example. - VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex()); - assert(AValNo && "COPY source not live"); - - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) - return false; - MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); - if (!DefMI) - return false; - const TargetInstrDesc &TID = DefMI->getDesc(); - if (!TID.isCommutable()) - return false; - // If DefMI is a two-address instruction then commuting it will change the - // destination register. - int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); - assert(DefIdx != -1); - unsigned UseOpIdx; - if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) - return false; - unsigned Op1, Op2, NewDstIdx; - if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2)) - return false; - if (Op1 == UseOpIdx) - NewDstIdx = Op2; - else if (Op2 == UseOpIdx) - NewDstIdx = Op1; - else - return false; - - MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); - unsigned NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !NewDstMO.isKill()) - return false; - - // Make sure there are no other definitions of IntB that would reach the - // uses which the new definition can reach. - if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) - return false; - - // Abort if the aliases of IntB.reg have values that are not simply the - // clobbers from the superreg. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) - if (li_->hasInterval(*AS) && - HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0)) - return false; - - // If some of the uses of IntA.reg is already coalesced away, return false. - // It's not possible to determine whether it's safe to perform the coalescing. - for (MachineRegisterInfo::use_nodbg_iterator UI = - mri_->use_nodbg_begin(IntA.reg), - UE = mri_->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - SlotIndex UseIdx = li_->getInstructionIndex(UseMI); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end()) - continue; - if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) - return false; - } - - DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t' - << *DefMI); - - // At this point we have decided that it is legal to do this - // transformation. Start by commuting the instruction. - MachineBasicBlock *MBB = DefMI->getParent(); - MachineInstr *NewMI = tii_->commuteInstruction(DefMI); - if (!NewMI) - return false; - if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && - TargetRegisterInfo::isVirtualRegister(IntB.reg) && - !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg))) - return false; - if (NewMI != DefMI) { - li_->ReplaceMachineInstrInMaps(DefMI, NewMI); - MBB->insert(DefMI, NewMI); - MBB->erase(DefMI); - } - unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); - NewMI->getOperand(OpIdx).setIsKill(); - - // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. - // A = or A, B - // ... - // B = A - // ... - // C = A<kill> - // ... - // = B - - // Update uses of IntA of the specific Val# with IntB. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), - UE = mri_->use_end(); UI != UE;) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; - ++UI; - if (JoinedCopies.count(UseMI)) - continue; - if (UseMI->isDebugValue()) { - // FIXME These don't have an instruction index. Not clear we have enough - // info to decide whether to do this replacement or not. For now do it. - UseMO.setReg(NewReg); - continue; - } - SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex(); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end() || ULR->valno != AValNo) - continue; - if (TargetRegisterInfo::isPhysicalRegister(NewReg)) - UseMO.substPhysReg(NewReg, *tri_); - else - UseMO.setReg(NewReg); - if (UseMI == CopyMI) - continue; - if (!UseMI->isCopy()) - continue; - if (UseMI->getOperand(0).getReg() != IntB.reg || - UseMI->getOperand(0).getSubReg()) - continue; - - // This copy will become a noop. If it's defining a new val#, merge it into - // BValNo. - SlotIndex DefIdx = UseIdx.getDefIndex(); - VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); - if (!DVNI) - continue; - DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); - assert(DVNI->def == DefIdx); - BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); - markAsJoined(UseMI); - } - - // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition - // is updated. - VNInfo *ValNo = BValNo; - ValNo->def = AValNo->def; - ValNo->setCopy(0); - for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); - AI != AE; ++AI) { - if (AI->valno != AValNo) continue; - IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); - } - DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); - - IntA.removeValNo(AValNo); - DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); - ++numCommutes; - return true; -} - -/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial -/// computation, replace the copy by rematerialize the definition. -bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, - bool preserveSrcInt, - unsigned DstReg, - unsigned DstSubIdx, - MachineInstr *CopyMI) { - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex(); - LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); - assert(SrcLR != SrcInt.end() && "Live range not found!"); - VNInfo *ValNo = SrcLR->valno; - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill()) - return false; - MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); - if (!DefMI) - return false; - assert(DefMI && "Defining instruction disappeared"); - const TargetInstrDesc &TID = DefMI->getDesc(); - if (!TID.isAsCheapAsAMove()) - return false; - if (!tii_->isTriviallyReMaterializable(DefMI, AA)) - return false; - bool SawStore = false; - if (!DefMI->isSafeToMove(tii_, AA, SawStore)) - return false; - if (TID.getNumDefs() != 1) - return false; - if (!DefMI->isImplicitDef()) { - // Make sure the copy destination register class fits the instruction - // definition register class. The mismatch can happen as a result of earlier - // extract_subreg, insert_subreg, subreg_to_reg coalescing. - const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_); - if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - if (mri_->getRegClass(DstReg) != RC) - return false; - } else if (!RC->contains(DstReg)) - return false; - } - - // If destination register has a sub-register index on it, make sure it - // matches the instruction register class. - if (DstSubIdx) { - const TargetInstrDesc &TID = DefMI->getDesc(); - if (TID.getNumDefs() != 1) - return false; - const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); - const TargetRegisterClass *DstSubRC = - DstRC->getSubRegisterRegClass(DstSubIdx); - const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_); - if (DefRC == DstRC) - DstSubIdx = 0; - else if (DefRC != DstSubRC) - return false; - } - - RemoveCopyFlag(DstReg, CopyMI); - - MachineBasicBlock *MBB = CopyMI->getParent(); - MachineBasicBlock::iterator MII = - llvm::next(MachineBasicBlock::iterator(CopyMI)); - tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); - MachineInstr *NewMI = prior(MII); - - // CopyMI may have implicit operands, transfer them over to the newly - // rematerialized instruction. And update implicit def interval valnos. - for (unsigned i = CopyMI->getDesc().getNumOperands(), - e = CopyMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = CopyMI->getOperand(i); - if (MO.isReg() && MO.isImplicit()) - NewMI->addOperand(MO); - if (MO.isDef()) - RemoveCopyFlag(MO.getReg(), CopyMI); - } - - NewMI->copyImplicitOps(CopyMI); - li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); - CopyMI->eraseFromParent(); - ReMatCopies.insert(CopyMI); - ReMatDefs.insert(DefMI); - DEBUG(dbgs() << "Remat: " << *NewMI); - ++NumReMats; - - // The source interval can become smaller because we removed a use. - if (preserveSrcInt) - li_->shrinkToUses(&SrcInt); - - return true; -} - -/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and -/// update the subregister number if it is not zero. If DstReg is a -/// physical register and the existing subregister number of the def / use -/// being updated is not zero, make sure to set it to the correct physical -/// subregister. -void -SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { - bool DstIsPhys = CP.isPhys(); - unsigned SrcReg = CP.getSrcReg(); - unsigned DstReg = CP.getDstReg(); - unsigned SubIdx = CP.getSubIdx(); - - // Update LiveDebugVariables. - ldv_->renameRegister(SrcReg, DstReg, SubIdx); - - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); - MachineInstr *UseMI = I.skipInstruction();) { - // A PhysReg copy that won't be coalesced can perhaps be rematerialized - // instead. - if (DstIsPhys) { - if (UseMI->isCopy() && - !UseMI->getOperand(1).getSubReg() && - !UseMI->getOperand(0).getSubReg() && - UseMI->getOperand(1).getReg() == SrcReg && - UseMI->getOperand(0).getReg() != SrcReg && - UseMI->getOperand(0).getReg() != DstReg && - !JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(li_->getInterval(SrcReg), false, - UseMI->getOperand(0).getReg(), 0, UseMI)) - continue; - } - - SmallVector<unsigned,8> Ops; - bool Reads, Writes; - tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); - bool Kills = false, Deads = false; - - // Replace SrcReg with DstReg in all UseMI operands. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = UseMI->getOperand(Ops[i]); - Kills |= MO.isKill(); - Deads |= MO.isDead(); - - if (DstIsPhys) - MO.substPhysReg(DstReg, *tri_); - else - MO.substVirtReg(DstReg, SubIdx, *tri_); - } - - // This instruction is a copy that will be removed. - if (JoinedCopies.count(UseMI)) - continue; - - if (SubIdx) { - // If UseMI was a simple SrcReg def, make sure we didn't turn it into a - // read-modify-write of DstReg. - if (Deads) - UseMI->addRegisterDead(DstReg, tri_); - else if (!Reads && Writes) - UseMI->addRegisterDefined(DstReg, tri_); - - // Kill flags apply to the whole physical register. - if (DstIsPhys && Kills) - UseMI->addRegisterKilled(DstReg, tri_); - } - - DEBUG({ - dbgs() << "\t\tupdated: "; - if (!UseMI->isDebugValue()) - dbgs() << li_->getInstructionIndex(UseMI) << "\t"; - dbgs() << *UseMI; - }); - } -} - -/// removeIntervalIfEmpty - Check if the live interval of a physical register -/// is empty, if so remove it and also remove the empty intervals of its -/// sub-registers. Return true if live interval is removed. -static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, - const TargetRegisterInfo *tri_) { - if (li.empty()) { - if (TargetRegisterInfo::isPhysicalRegister(li.reg)) - for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - LiveInterval &sli = li_->getInterval(*SR); - if (sli.empty()) - li_->removeInterval(*SR); - } - li_->removeInterval(li.reg); - return true; - } - return false; -} - -/// RemoveDeadDef - If a def of a live interval is now determined dead, remove -/// the val# it defines. If the live interval becomes empty, remove it as well. -bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, - MachineInstr *DefMI) { - SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex(); - LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); - if (DefIdx != MLR->valno->def) - return false; - li.removeValNo(MLR->valno); - return removeIntervalIfEmpty(li, li_, tri_); -} - -void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg, - const MachineInstr *CopyMI) { - SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); - if (li_->hasInterval(DstReg)) { - LiveInterval &LI = li_->getInterval(DstReg); - if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->def == DefIdx) - LR->valno->setCopy(0); - } - if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) - return; - for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) { - if (!li_->hasInterval(*AS)) - continue; - LiveInterval &LI = li_->getInterval(*AS); - if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->def == DefIdx) - LR->valno->setCopy(0); - } -} - -/// shouldJoinPhys - Return true if a copy involving a physreg should be joined. -/// We need to be careful about coalescing a source physical register with a -/// virtual register. Once the coalescing is done, it cannot be broken and these -/// are not spillable! If the destination interval uses are far away, think -/// twice about coalescing them! -bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) { - bool Allocatable = li_->isAllocatable(CP.getDstReg()); - LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); - - /// Always join simple intervals that are defined by a single copy from a - /// reserved register. This doesn't increase register pressure, so it is - /// always beneficial. - if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) - return true; - - if (!EnablePhysicalJoin) { - DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); - return false; - } - - // Only coalesce to allocatable physreg, we don't want to risk modifying - // reserved registers. - if (!Allocatable) { - DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); - return false; // Not coalescable. - } - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (li_->hasInterval(CP.getDstReg()) && - li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { - ++numAborts; - DEBUG(dbgs() - << "\tPhysical register live interval too complicated, abort!\n"); - return false; - } - - // FIXME: Why are we skipping this test for partial copies? - // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. - if (!CP.isPartial()) { - const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); - unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold) { - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - return false; - } - } - return true; -} - -/// isWinToJoinCrossClass - Return true if it's profitable to coalesce -/// two virtual registers from different register classes. -bool -SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, - unsigned DstReg, - const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *NewRC) { - unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC); - // This heuristics is good enough in practice, but it's obviously not *right*. - // 4 is a magic number that works well enough for x86, ARM, etc. It filter - // out all but the most restrictive register classes. - if (NewRCCount > 4 || - // Early exit if the function is fairly small, coalesce aggressively if - // that's the case. For really special register classes with 3 or - // fewer registers, be a bit more careful. - (li_->getFuncInstructionCount() / NewRCCount) < 8) - return true; - LiveInterval &SrcInt = li_->getInterval(SrcReg); - LiveInterval &DstInt = li_->getInterval(DstReg); - unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); - unsigned DstSize = li_->getApproximateInstructionCount(DstInt); - - // Coalesce aggressively if the intervals are small compared to the number of - // registers in the new class. The number 4 is fairly arbitrary, chosen to be - // less aggressive than the 8 used for the whole function size. - const unsigned ThresSize = 4 * NewRCCount; - if (SrcSize <= ThresSize && DstSize <= ThresSize) - return true; - - // Estimate *register use density*. If it doubles or more, abort. - unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), - mri_->use_nodbg_end()); - unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), - mri_->use_nodbg_end()); - unsigned NewUses = SrcUses + DstUses; - unsigned NewSize = SrcSize + DstSize; - if (SrcRC != NewRC && SrcSize > ThresSize) { - unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC); - if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) - return false; - } - if (DstRC != NewRC && DstSize > ThresSize) { - unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC); - if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) - return false; - } - return true; -} - - -/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, -/// which are the src/dst of the copy instruction CopyMI. This returns true -/// if the copy was successfully coalesced away. If it is not currently -/// possible to coalesce this interval, but it may be possible if other -/// things get coalesced, then it returns true by reference in 'Again'. -bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { - MachineInstr *CopyMI = TheCopy.MI; - - Again = false; - if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) - return false; // Already done. - - DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); - - CoalescerPair CP(*tii_, *tri_); - if (!CP.setRegisters(CopyMI)) { - DEBUG(dbgs() << "\tNot coalescable.\n"); - return false; - } - - // If they are already joined we continue. - if (CP.getSrcReg() == CP.getDstReg()) { - markAsJoined(CopyMI); - DEBUG(dbgs() << "\tCopy already coalesced.\n"); - return false; // Not coalescable. - } - - DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_) - << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) - << "\n"); - - // Enforce policies. - if (CP.isPhys()) { - if (!shouldJoinPhys(CP)) { - // Before giving up coalescing, if definition of source is defined by - // trivial computation, try rematerializing it. - if (!CP.isFlipped() && - ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, - CP.getDstReg(), 0, CopyMI)) - return true; - return false; - } - } else { - // Avoid constraining virtual register regclass too much. - if (CP.isCrossClass()) { - DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); - if (DisableCrossClassJoin) { - DEBUG(dbgs() << "\tCross-class joins disabled.\n"); - return false; - } - if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), - mri_->getRegClass(CP.getSrcReg()), - mri_->getRegClass(CP.getDstReg()), - CP.getNewRC())) { - DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } - - // When possible, let DstReg be the larger interval. - if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() > - li_->getInterval(CP.getDstReg()).ranges.size()) - CP.flip(); - } - - // Okay, attempt to join these two intervals. On failure, this returns false. - // Otherwise, if one of the intervals being joined is a physreg, this method - // always canonicalizes DstInt to be it. The output "SrcInt" will not have - // been modified, so we can use this information below to update aliases. - if (!JoinIntervals(CP)) { - // Coalescing failed. - - // If definition of source is defined by trivial computation, try - // rematerializing it. - if (!CP.isFlipped() && - ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, - CP.getDstReg(), 0, CopyMI)) - return true; - - // If we can eliminate the copy without merging the live ranges, do so now. - if (!CP.isPartial()) { - if (AdjustCopiesBackFrom(CP, CopyMI) || - RemoveCopyByCommutingDef(CP, CopyMI)) { - markAsJoined(CopyMI); - DEBUG(dbgs() << "\tTrivial!\n"); - return true; - } - } - - // Otherwise, we are unable to join the intervals. - DEBUG(dbgs() << "\tInterference!\n"); - Again = true; // May be possible to coalesce later. - return false; - } - - // Coalescing to a virtual register that is of a sub-register class of the - // other. Make sure the resulting register is set to the right register class. - if (CP.isCrossClass()) { - ++numCrossRCs; - mri_->setRegClass(CP.getDstReg(), CP.getNewRC()); - } - - // Remember to delete the copy instruction. - markAsJoined(CopyMI); - - UpdateRegDefsUses(CP); - - // If we have extended the live range of a physical register, make sure we - // update live-in lists as well. - if (CP.isPhys()) { - SmallVector<MachineBasicBlock*, 16> BlockSeq; - // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the - // ranges for this, and they are preserved. - LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg()); - for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); - I != E; ++I ) { - li_->findLiveInMBBs(I->start, I->end, BlockSeq); - for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { - MachineBasicBlock &block = *BlockSeq[idx]; - if (!block.isLiveIn(CP.getDstReg())) - block.addLiveIn(CP.getDstReg()); - } - BlockSeq.clear(); - } - } - - // SrcReg is guarateed to be the register whose live interval that is - // being merged. - li_->removeInterval(CP.getSrcReg()); - - // Update regalloc hint. - tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_); - - DEBUG({ - LiveInterval &DstInt = li_->getInterval(CP.getDstReg()); - dbgs() << "\tJoined. Result = "; - DstInt.print(dbgs(), tri_); - dbgs() << "\n"; - }); - - ++numJoins; - return true; -} - -/// ComputeUltimateVN - Assuming we are going to join two live intervals, -/// compute what the resultant value numbers for each value in the input two -/// ranges will be. This is complicated by copies between the two which can -/// and will commonly cause multiple value numbers to be merged into one. -/// -/// VN is the value number that we're trying to resolve. InstDefiningValue -/// keeps track of the new InstDefiningValue assignment for the result -/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of -/// whether a value in this or other is a copy from the opposite set. -/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have -/// already been assigned. -/// -/// ThisFromOther[x] - If x is defined as a copy from the other interval, this -/// contains the value number the copy is from. -/// -static unsigned ComputeUltimateVN(VNInfo *VNI, - SmallVector<VNInfo*, 16> &NewVNInfo, - DenseMap<VNInfo*, VNInfo*> &ThisFromOther, - DenseMap<VNInfo*, VNInfo*> &OtherFromThis, - SmallVector<int, 16> &ThisValNoAssignments, - SmallVector<int, 16> &OtherValNoAssignments) { - unsigned VN = VNI->id; - - // If the VN has already been computed, just return it. - if (ThisValNoAssignments[VN] >= 0) - return ThisValNoAssignments[VN]; - assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); - - // If this val is not a copy from the other val, then it must be a new value - // number in the destination. - DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); - if (I == ThisFromOther.end()) { - NewVNInfo.push_back(VNI); - return ThisValNoAssignments[VN] = NewVNInfo.size()-1; - } - VNInfo *OtherValNo = I->second; - - // Otherwise, this *is* a copy from the RHS. If the other side has already - // been computed, return it. - if (OtherValNoAssignments[OtherValNo->id] >= 0) - return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; - - // Mark this value number as currently being computed, then ask what the - // ultimate value # of the other value is. - ThisValNoAssignments[VN] = -2; - unsigned UltimateVN = - ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, - OtherValNoAssignments, ThisValNoAssignments); - return ThisValNoAssignments[VN] = UltimateVN; -} - -/// JoinIntervals - Attempt to join these two intervals. On failure, this -/// returns false. -bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) { - LiveInterval &RHS = li_->getInterval(CP.getSrcReg()); - DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; }); - - // If a live interval is a physical register, check for interference with any - // aliases. The interference check implemented here is a bit more conservative - // than the full interfeence check below. We allow overlapping live ranges - // only when one is a copy of the other. - if (CP.isPhys()) { - for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){ - if (!li_->hasInterval(*AS)) - continue; - const LiveInterval &LHS = li_->getInterval(*AS); - LiveInterval::const_iterator LI = LHS.begin(); - for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); - RI != RE; ++RI) { - LI = std::lower_bound(LI, LHS.end(), RI->start); - // Does LHS have an overlapping live range starting before RI? - if ((LI != LHS.begin() && LI[-1].end > RI->start) && - (RI->start != RI->valno->def || - !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) { - DEBUG({ - dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), tri_); - dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; - }); - return false; - } - - // Check that LHS ranges beginning in this range are copies. - for (; LI != LHS.end() && LI->start < RI->end; ++LI) { - if (LI->start != LI->valno->def || - !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) { - DEBUG({ - dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), tri_); - dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; - }); - return false; - } - } - } - } - } - - // Compute the final value assignment, assuming that the live ranges can be - // coalesced. - SmallVector<int, 16> LHSValNoAssignments; - SmallVector<int, 16> RHSValNoAssignments; - DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; - DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; - SmallVector<VNInfo*, 16> NewVNInfo; - - LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg()); - DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; }); - - // Loop over the value numbers of the LHS, seeing if any are defined from - // the RHS. - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? - continue; - - // Never join with a register that has EarlyClobber redefs. - if (VNI->hasRedefByEC()) - return false; - - // DstReg is known to be a register in the LHS interval. If the src is - // from the RHS interval, we can use its value #. - if (!CP.isCoalescable(VNI->getCopy())) - continue; - - // Figure out the value # from the RHS. - LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - // The copy could be to an aliased physreg. - if (!lr) continue; - LHSValsDefinedFromRHS[VNI] = lr->valno; - } - - // Loop over the value numbers of the RHS, seeing if any are defined from - // the LHS. - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? - continue; - - // Never join with a register that has EarlyClobber redefs. - if (VNI->hasRedefByEC()) - return false; - - // DstReg is known to be a register in the RHS interval. If the src is - // from the LHS interval, we can use its value #. - if (!CP.isCoalescable(VNI->getCopy())) - continue; - - // Figure out the value # from the LHS. - LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - // The copy could be to an aliased physreg. - if (!lr) continue; - RHSValsDefinedFromLHS[VNI] = lr->valno; - } - - LHSValNoAssignments.resize(LHS.getNumValNums(), -1); - RHSValNoAssignments.resize(RHS.getNumValNums(), -1); - NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); - - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - ComputeUltimateVN(VNI, NewVNInfo, - LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, - LHSValNoAssignments, RHSValNoAssignments); - } - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - // If this value number isn't a copy from the LHS, it's a new number. - if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { - NewVNInfo.push_back(VNI); - RHSValNoAssignments[VN] = NewVNInfo.size()-1; - continue; - } - - ComputeUltimateVN(VNI, NewVNInfo, - RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, - RHSValNoAssignments, LHSValNoAssignments); - } - - // Armed with the mappings of LHS/RHS values to ultimate values, walk the - // interval lists to see if these intervals are coalescable. - LiveInterval::const_iterator I = LHS.begin(); - LiveInterval::const_iterator IE = LHS.end(); - LiveInterval::const_iterator J = RHS.begin(); - LiveInterval::const_iterator JE = RHS.end(); - - // Skip ahead until the first place of potential sharing. - if (I != IE && J != JE) { - if (I->start < J->start) { - I = std::upper_bound(I, IE, J->start); - if (I != LHS.begin()) --I; - } else if (J->start < I->start) { - J = std::upper_bound(J, JE, I->start); - if (J != RHS.begin()) --J; - } - } - - while (I != IE && J != JE) { - // Determine if these two live ranges overlap. - bool Overlaps; - if (I->start < J->start) { - Overlaps = I->end > J->start; - } else { - Overlaps = J->end > I->start; - } - - // If so, check value # info to determine if they are really different. - if (Overlaps) { - // If the live range overlap will map to the same value number in the - // result liverange, we can still coalesce them. If not, we can't. - if (LHSValNoAssignments[I->valno->id] != - RHSValNoAssignments[J->valno->id]) - return false; - // If it's re-defined by an early clobber somewhere in the live range, - // then conservatively abort coalescing. - if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC()) - return false; - } - - if (I->end < J->end) - ++I; - else - ++J; - } - - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), - E = LHSValsDefinedFromRHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned LHSValID = LHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[LHSValID]->setHasPHIKill(true); - } - - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), - E = RHSValsDefinedFromLHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned RHSValID = RHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[RHSValID]->setHasPHIKill(true); - } - - if (LHSValNoAssignments.empty()) - LHSValNoAssignments.push_back(-1); - if (RHSValNoAssignments.empty()) - RHSValNoAssignments.push_back(-1); - - // If we get here, we know that we can coalesce the live ranges. Ask the - // intervals to coalesce themselves now. - LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, - mri_); - return true; -} - -namespace { - // DepthMBBCompare - Comparison predicate that sort first based on the loop - // depth of the basic block (the unsigned), and then on the MBB number. - struct DepthMBBCompare { - typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; - bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { - // Deeper loops first - if (LHS.first != RHS.first) - return LHS.first > RHS.first; - - // Prefer blocks that are more connected in the CFG. This takes care of - // the most difficult copies first while intervals are short. - unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); - unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); - if (cl != cr) - return cl > cr; - - // As a last resort, sort by block number. - return LHS.second->getNumber() < RHS.second->getNumber(); - } - }; -} - -void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector<CopyRec> &TryAgain) { - DEBUG(dbgs() << MBB->getName() << ":\n"); - - SmallVector<CopyRec, 8> VirtCopies; - SmallVector<CopyRec, 8> PhysCopies; - SmallVector<CopyRec, 8> ImpDefCopies; - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E;) { - MachineInstr *Inst = MII++; - - // If this isn't a copy nor a extract_subreg, we can't join intervals. - unsigned SrcReg, DstReg; - if (Inst->isCopy()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(1).getReg(); - } else if (Inst->isSubregToReg()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(2).getReg(); - } else - continue; - - bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) - ImpDefCopies.push_back(CopyRec(Inst, 0)); - else if (SrcIsPhys || DstIsPhys) - PhysCopies.push_back(CopyRec(Inst, 0)); - else - VirtCopies.push_back(CopyRec(Inst, 0)); - } - - // Try coalescing implicit copies and insert_subreg <undef> first, - // followed by copies to / from physical registers, then finally copies - // from virtual registers to virtual registers. - for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { - CopyRec &TheCopy = ImpDefCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } - for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { - CopyRec &TheCopy = PhysCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } - for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { - CopyRec &TheCopy = VirtCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } -} - -void SimpleRegisterCoalescing::joinIntervals() { - DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); - - std::vector<CopyRec> TryAgainList; - if (loopInfo->empty()) { - // If there are no loops in the function, join intervals in function order. - for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); - I != E; ++I) - CopyCoalesceInMBB(I, TryAgainList); - } else { - // Otherwise, join intervals in inner loops before other intervals. - // Unfortunately we can't just iterate over loop hierarchy here because - // there may be more MBB's than BB's. Collect MBB's for sorting. - - // Join intervals in the function prolog first. We want to join physical - // registers with virtual registers before the intervals got too long. - std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; - for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){ - MachineBasicBlock *MBB = I; - MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I)); - } - - // Sort by loop depth. - std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); - - // Finally, join intervals in loop nest order. - for (unsigned i = 0, e = MBBs.size(); i != e; ++i) - CopyCoalesceInMBB(MBBs[i].second, TryAgainList); - } - - // Joining intervals can allow other intervals to be joined. Iteratively join - // until we make no progress. - bool ProgressMade = true; - while (ProgressMade) { - ProgressMade = false; - - for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { - CopyRec &TheCopy = TryAgainList[i]; - if (!TheCopy.MI) - continue; - - bool Again = false; - bool Success = JoinCopy(TheCopy, Again); - if (Success || !Again) { - TheCopy.MI = 0; // Mark this one as done. - ProgressMade = true; - } - } - } -} - -void SimpleRegisterCoalescing::releaseMemory() { - JoinedCopies.clear(); - ReMatCopies.clear(); - ReMatDefs.clear(); -} - -bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { - mf_ = &fn; - mri_ = &fn.getRegInfo(); - tm_ = &fn.getTarget(); - tri_ = tm_->getRegisterInfo(); - tii_ = tm_->getInstrInfo(); - li_ = &getAnalysis<LiveIntervals>(); - ldv_ = &getAnalysis<LiveDebugVariables>(); - AA = &getAnalysis<AliasAnalysis>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); - - DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" - << "********** Function: " - << ((Value*)mf_->getFunction())->getName() << '\n'); - - if (VerifyCoalescing) - mf_->verify(this, "Before register coalescing"); - - RegClassInfo.runOnMachineFunction(fn); - - // Join (coalesce) intervals if requested. - if (EnableJoining) { - joinIntervals(); - DEBUG({ - dbgs() << "********** INTERVALS POST JOINING **********\n"; - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); - I != E; ++I){ - I->second->print(dbgs(), tri_); - dbgs() << "\n"; - } - }); - } - - // Perform a final pass over the instructions and compute spill weights - // and remove identity moves. - SmallVector<unsigned, 4> DeadDefs; - for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); - mbbi != mbbe; ++mbbi) { - MachineBasicBlock* mbb = mbbi; - for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); - mii != mie; ) { - MachineInstr *MI = mii; - if (JoinedCopies.count(MI)) { - // Delete all coalesced copies. - bool DoDelete = true; - assert(MI->isCopyLike() && "Unrecognized copy instruction"); - unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - MI->getNumOperands() > 2) - // Do not delete extract_subreg, insert_subreg of physical - // registers unless the definition is dead. e.g. - // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 - // or else the scavenger may complain. LowerSubregs will - // delete them later. - DoDelete = false; - - if (MI->allDefsAreDead()) { - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - li_->hasInterval(SrcReg)) - li_->shrinkToUses(&li_->getInterval(SrcReg)); - DoDelete = true; - } - if (!DoDelete) { - // We need the instruction to adjust liveness, so make it a KILL. - if (MI->isSubregToReg()) { - MI->RemoveOperand(3); - MI->RemoveOperand(1); - } - MI->setDesc(tii_->get(TargetOpcode::KILL)); - mii = llvm::next(mii); - } else { - li_->RemoveMachineInstrFromMaps(MI); - mii = mbbi->erase(mii); - ++numPeep; - } - continue; - } - - // Now check if this is a remat'ed def instruction which is now dead. - if (ReMatDefs.count(MI)) { - bool isDead = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) - DeadDefs.push_back(Reg); - if (MO.isDead()) - continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !mri_->use_nodbg_empty(Reg)) { - isDead = false; - break; - } - } - if (isDead) { - while (!DeadDefs.empty()) { - unsigned DeadDef = DeadDefs.back(); - DeadDefs.pop_back(); - RemoveDeadDef(li_->getInterval(DeadDef), MI); - } - li_->RemoveMachineInstrFromMaps(mii); - mii = mbbi->erase(mii); - continue; - } else - DeadDefs.clear(); - } - - ++mii; - - // Check for now unnecessary kill flags. - if (li_->isNotInMIMap(MI)) continue; - SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isKill()) continue; - unsigned reg = MO.getReg(); - if (!reg || !li_->hasInterval(reg)) continue; - if (!li_->getInterval(reg).killedAt(DefIdx)) { - MO.setIsKill(false); - continue; - } - // When leaving a kill flag on a physreg, check if any subregs should - // remain alive. - if (!TargetRegisterInfo::isPhysicalRegister(reg)) - continue; - for (const unsigned *SR = tri_->getSubRegisters(reg); - unsigned S = *SR; ++SR) - if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx)) - MI->addRegisterDefined(S, tri_); - } - } - } - - DEBUG(dump()); - DEBUG(ldv_->dump()); - if (VerifyCoalescing) - mf_->verify(this, "After register coalescing"); - return true; -} - -/// print - Implement the dump method. -void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const { - li_->print(O, m); -} - -RegisterCoalescer* llvm::createSimpleRegisterCoalescer() { - return new SimpleRegisterCoalescing(); -} - -// Make sure that anything that uses RegisterCoalescer pulls in this file... -DEFINING_FILE_FOR(SimpleRegisterCoalescing) diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 92970e4..65a33da 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -87,12 +87,10 @@ FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) { bool SjLjEHPass::doInitialization(Module &M) { // Build the function context structure. // builtin_setjmp uses a five word jbuf - const Type *VoidPtrTy = - Type::getInt8PtrTy(M.getContext()); - const Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + Type *Int32Ty = Type::getInt32Ty(M.getContext()); FunctionContextTy = - StructType::get(M.getContext(), - VoidPtrTy, // __prev + StructType::get(VoidPtrTy, // __prev Int32Ty, // call_site ArrayType::get(Int32Ty, 4), // __data VoidPtrTy, // __personality diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index bf27cc8..761cab7 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -76,12 +76,14 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { return LSP.first; // There may not be a call instruction (?) in which case we ignore LPad. LSP.second = LSP.first; - for (MachineBasicBlock::const_iterator I = FirstTerm, E = MBB->begin(); - I != E; --I) + for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin(); + I != E;) { + --I; if (I->getDesc().isCall()) { LSP.second = LIS.getInstructionIndex(I); break; } + } } // If CurLI is live into a landing pad successor, move the last split point @@ -122,7 +124,7 @@ void SplitAnalysis::analyzeUses() { // Compute per-live block info. if (!calcLiveBlockInfo()) { // FIXME: calcLiveBlockInfo found inconsistencies in the live range. - // I am looking at you, SimpleRegisterCoalescing! + // I am looking at you, RegisterCoalescer! DidRepairRange = true; ++NumRepairs; DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); @@ -165,7 +167,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); // If the block contains no uses, the range must be live through. At one - // point, SimpleRegisterCoalescing could create dangling ranges that ended + // point, RegisterCoalescer could create dangling ranges that ended // mid-block. if (UseI == UseE || *UseI >= Stop) { ++NumThroughBlocks; @@ -634,6 +636,7 @@ unsigned SplitEditor::openIntv() { void SplitEditor::selectIntv(unsigned Idx) { assert(Idx != 0 && "Cannot select the complement interval"); assert(Idx < Edit->size() && "Can only select previously opened interval"); + DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n'); OpenIdx = Idx; } @@ -654,6 +657,24 @@ SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { return VNI->def; } +SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before enterIntvAfter"); + DEBUG(dbgs() << " enterIntvAfter " << Idx); + Idx = Idx.getBoundaryIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx; + } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "enterIntvAfter called with invalid index"); + + VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), + llvm::next(MachineBasicBlock::iterator(MI))); + return VNI->def; +} + SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { assert(OpenIdx && "openIntv not called before enterIntvAtEnd"); SlotIndex End = LIS.getMBBEndIdx(&MBB); @@ -1005,12 +1026,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { markComplexMapped(i, ParentVNI); } -#ifndef NDEBUG - // Every new interval must have a def by now, otherwise the split is bogus. - for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) - assert((*I)->hasAtLeastOneValue() && "Split interval has no value"); -#endif - // Transfer the simply mapped values, check if any are skipped. bool Skipped = transferValues(); if (Skipped) @@ -1109,3 +1124,263 @@ void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { } finish(); } + + +//===----------------------------------------------------------------------===// +// Global Live Range Splitting Support +//===----------------------------------------------------------------------===// + +// These methods support a method of global live range splitting that uses a +// global algorithm to decide intervals for CFG edges. They will insert split +// points and color intervals in basic blocks while avoiding interference. +// +// Note that splitSingleBlock is also useful for blocks where both CFG edges +// are on the stack. + +void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, + unsigned IntvIn, SlotIndex LeaveBefore, + unsigned IntvOut, SlotIndex EnterAfter){ + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); + + DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop + << ") intf " << LeaveBefore << '-' << EnterAfter + << ", live-through " << IntvIn << " -> " << IntvOut); + + assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks"); + + if (!IntvOut) { + DEBUG(dbgs() << ", spill on entry.\n"); + // + // <<<<<<<<< Possible LeaveBefore interference. + // |-----------| Live through. + // -____________ Spill on entry. + // + selectIntv(IntvIn); + MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); + SlotIndex Idx = leaveIntvAtTop(*MBB); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + (void)Idx; + return; + } + + if (!IntvIn) { + DEBUG(dbgs() << ", reload on exit.\n"); + // + // >>>>>>> Possible EnterAfter interference. + // |-----------| Live through. + // ___________-- Reload on exit. + // + selectIntv(IntvOut); + MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); + SlotIndex Idx = enterIntvAtEnd(*MBB); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + (void)Idx; + return; + } + + if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) { + DEBUG(dbgs() << ", straight through.\n"); + // + // |-----------| Live through. + // ------------- Straight through, same intv, no interference. + // + selectIntv(IntvOut); + useIntv(Start, Stop); + return; + } + + // We cannot legally insert splits after LSP. + SlotIndex LSP = SA.getLastSplitPoint(MBBNum); + + if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter || + LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) { + DEBUG(dbgs() << ", switch avoiding interference.\n"); + // + // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference. + // |-----------| Live through. + // ------======= Switch intervals between interference. + // + SlotIndex Cut = (LeaveBefore && LeaveBefore < LSP) ? LeaveBefore : LSP; + selectIntv(IntvOut); + SlotIndex Idx = enterIntvBefore(Cut); + useIntv(Idx, Stop); + selectIntv(IntvIn); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + return; + } + + DEBUG(dbgs() << ", create local intv for interference.\n"); + // + // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference. + // |-----------| Live through. + // ==---------== Switch intervals before/after interference. + // + assert(LeaveBefore <= EnterAfter && "Missed case"); + + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAfter(EnterAfter); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + + selectIntv(IntvIn); + Idx = leaveIntvBefore(LeaveBefore); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); +} + + +void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvIn, SlotIndex LeaveBefore) { + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + << "), uses " << BI.FirstUse << '-' << BI.LastUse + << ", reg-in " << IntvIn << ", leave before " << LeaveBefore + << (BI.LiveOut ? ", stack-out" : ", killed in block")); + + assert(IntvIn && "Must have register in"); + assert(BI.LiveIn && "Must be live-in"); + assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference"); + + if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastUse)) { + DEBUG(dbgs() << " before interference.\n"); + // + // <<< Interference after kill. + // |---o---x | Killed in block. + // ========= Use IntvIn everywhere. + // + selectIntv(IntvIn); + useIntv(Start, BI.LastUse); + return; + } + + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + + if (!LeaveBefore || LeaveBefore > BI.LastUse.getBoundaryIndex()) { + // + // <<< Possible interference after last use. + // |---o---o---| Live-out on stack. + // =========____ Leave IntvIn after last use. + // + // < Interference after last use. + // |---o---o--o| Live-out on stack, late last use. + // ============ Copy to stack after LSP, overlap IntvIn. + // \_____ Stack interval is live-out. + // + if (BI.LastUse < LSP) { + DEBUG(dbgs() << ", spill after last use before interference.\n"); + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvAfter(BI.LastUse); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + } else { + DEBUG(dbgs() << ", spill before last split point.\n"); + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvBefore(LSP); + overlapIntv(Idx, BI.LastUse); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + } + return; + } + + // The interference is overlapping somewhere we wanted to use IntvIn. That + // means we need to create a local interval that can be allocated a + // different register. + unsigned LocalIntv = openIntv(); + (void)LocalIntv; + DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n"); + + if (!BI.LiveOut || BI.LastUse < LSP) { + // + // <<<<<<< Interference overlapping uses. + // |---o---o---| Live-out on stack. + // =====----____ Leave IntvIn before interference, then spill. + // + SlotIndex To = leaveIntvAfter(BI.LastUse); + SlotIndex From = enterIntvBefore(LeaveBefore); + useIntv(From, To); + selectIntv(IntvIn); + useIntv(Start, From); + assert((!LeaveBefore || From <= LeaveBefore) && "Interference"); + return; + } + + // <<<<<<< Interference overlapping uses. + // |---o---o--o| Live-out on stack, late last use. + // =====------- Copy to stack before LSP, overlap LocalIntv. + // \_____ Stack interval is live-out. + // + SlotIndex To = leaveIntvBefore(LSP); + overlapIntv(To, BI.LastUse); + SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore)); + useIntv(From, To); + selectIntv(IntvIn); + useIntv(Start, From); + assert((!LeaveBefore || From <= LeaveBefore) && "Interference"); +} + +void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvOut, SlotIndex EnterAfter) { + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + << "), uses " << BI.FirstUse << '-' << BI.LastUse + << ", reg-out " << IntvOut << ", enter after " << EnterAfter + << (BI.LiveIn ? ", stack-in" : ", defined in block")); + + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + + assert(IntvOut && "Must have register out"); + assert(BI.LiveOut && "Must be live-out"); + assert((!EnterAfter || EnterAfter < LSP) && "Bad interference"); + + if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstUse)) { + DEBUG(dbgs() << " after interference.\n"); + // + // >>>> Interference before def. + // | o---o---| Defined in block. + // ========= Use IntvOut everywhere. + // + selectIntv(IntvOut); + useIntv(BI.FirstUse, Stop); + return; + } + + if (!EnterAfter || EnterAfter < BI.FirstUse.getBaseIndex()) { + DEBUG(dbgs() << ", reload after interference.\n"); + // + // >>>> Interference before def. + // |---o---o---| Live-through, stack-in. + // ____========= Enter IntvOut before first use. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstUse)); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + return; + } + + // The interference is overlapping somewhere we wanted to use IntvOut. That + // means we need to create a local interval that can be allocated a + // different register. + DEBUG(dbgs() << ", interference overlaps uses.\n"); + // + // >>>>>>> Interference overlapping uses. + // |---o---o---| Live-through, stack-in. + // ____---====== Create local interval for interference range. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAfter(EnterAfter); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + + openIntv(); + SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstUse)); + useIntv(From, Idx); +} diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 7174c0b..7948b72 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -81,6 +81,12 @@ public: bool LiveThrough; ///< Live in whole block (Templ 5. above). bool LiveIn; ///< Current reg is live in. bool LiveOut; ///< Current reg is live out. + + /// isOneInstr - Returns true when this BlockInfo describes a single + /// instruction. + bool isOneInstr() const { + return SlotIndex::isSameInstr(FirstUse, LastUse); + } }; private: @@ -360,6 +366,10 @@ public: /// Return the beginning of the new live range. SlotIndex enterIntvBefore(SlotIndex Idx); + /// enterIntvAfter - Enter the open interval after the instruction at Idx. + /// Return the beginning of the new live range. + SlotIndex enterIntvAfter(SlotIndex Idx); + /// enterIntvAtEnd - Enter the open interval at the end of MBB. /// Use the open interval from he inserted copy to the MBB end. /// Return the beginning of the new live range. @@ -416,6 +426,42 @@ public: /// splitSingleBlocks - Split CurLI into a separate live interval inside each /// basic block in Blocks. void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); + + /// splitLiveThroughBlock - Split CurLI in the given block such that it + /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in + /// the block, but they will be ignored when placing split points. + /// + /// @param MBBNum Block number. + /// @param IntvIn Interval index entering the block. + /// @param LeaveBefore When set, leave IntvIn before this point. + /// @param IntvOut Interval index leaving the block. + /// @param EnterAfter When set, enter IntvOut after this point. + void splitLiveThroughBlock(unsigned MBBNum, + unsigned IntvIn, SlotIndex LeaveBefore, + unsigned IntvOut, SlotIndex EnterAfter); + + /// splitRegInBlock - Split CurLI in the given block such that it enters the + /// block in IntvIn and leaves it on the stack (or not at all). Split points + /// are placed in a way that avoids putting uses in the stack interval. This + /// may require creating a local interval when there is interference. + /// + /// @param BI Block descriptor. + /// @param IntvIn Interval index entering the block. Not 0. + /// @param LeaveBefore When set, leave IntvIn before this point. + void splitRegInBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvIn, SlotIndex LeaveBefore); + + /// splitRegOutBlock - Split CurLI in the given block such that it enters the + /// block on the stack (or isn't live-in at all) and leaves it in IntvOut. + /// Split points are placed to avoid interference and such that the uses are + /// not in the stack interval. This may require creating a local interval + /// when there is interference. + /// + /// @param BI Block descriptor. + /// @param IntvOut Interval index leaving the block. + /// @param EnterAfter When set, enter IntvOut after this point. + void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvOut, SlotIndex EnterAfter); }; } diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp index 08aee82..ec75df4 100644 --- a/lib/CodeGen/Splitter.cpp +++ b/lib/CodeGen/Splitter.cpp @@ -11,7 +11,7 @@ #include "Splitter.h" -#include "SimpleRegisterCoalescing.h" +#include "RegisterCoalescer.h" #include "llvm/Module.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index f0a44ab..d3cbd15 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -186,7 +186,7 @@ bool StackProtector::InsertStackProtectors() { Value *Args[] = { LI, AI }; CallInst:: Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), - &Args[0], array_endof(Args), "", InsPt); + Args, "", InsPt); // Create the basic block to jump to when the guard check fails. FailBB = CreateFailBB(); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 01f5b56..57cbe1ba 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -504,7 +504,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, bool FoundDef = false; // Not counting 2address def. Uses.clear(); - const TargetInstrDesc &TID = MII->getDesc(); + const MCInstrDesc &MCID = MII->getDesc(); for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { MachineOperand &MO = MII->getOperand(i); if (!MO.isReg()) @@ -521,7 +521,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, if (MO.getSubReg() || MII->isSubregToReg()) return false; - const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); + const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); if (RC && !RC->contains(NewReg)) return false; @@ -566,7 +566,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, SmallVector<MachineOperand*, 4> Uses; while (++MII != MBB->end()) { bool FoundKill = false; - const TargetInstrDesc &TID = MII->getDesc(); + const MCInstrDesc &MCID = MII->getDesc(); for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { MachineOperand &MO = MII->getOperand(i); if (!MO.isReg()) @@ -583,7 +583,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, if (MO.getSubReg()) return false; - const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); + const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); if (RC && !RC->contains(NewReg)) return false; if (MO.isKill()) diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index e8eab8f..6b801cb 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -95,10 +95,22 @@ namespace { SmallSetVector<MachineBasicBlock*, 8> &Succs); bool TailDuplicateBlocks(MachineFunction &MF); bool shouldTailDuplicate(const MachineFunction &MF, - MachineBasicBlock &TailBB); - bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, + bool IsSimple, MachineBasicBlock &TailBB); + bool isSimpleBB(MachineBasicBlock *TailBB); + bool canCompletelyDuplicateBB(MachineBasicBlock &BB); + bool duplicateSimpleBB(MachineBasicBlock *TailBB, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + const DenseSet<unsigned> &RegsUsedByPhi, + SmallVector<MachineInstr*, 16> &Copies); + bool TailDuplicate(MachineBasicBlock *TailBB, + bool IsSimple, + MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies); + bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, + bool IsSimple, + MachineFunction &MF); + void RemoveDeadBlock(MachineBasicBlock *MBB); }; @@ -169,6 +181,109 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { } } +/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup. +bool +TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, + bool IsSimple, + MachineFunction &MF) { + // Save the successors list. + SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), + MBB->succ_end()); + + SmallVector<MachineBasicBlock*, 8> TDBBs; + SmallVector<MachineInstr*, 16> Copies; + if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies)) + return false; + + ++NumTails; + + SmallVector<MachineInstr*, 8> NewPHIs; + MachineSSAUpdater SSAUpdate(MF, &NewPHIs); + + // TailBB's immediate successors are now successors of those predecessors + // which duplicated TailBB. Add the predecessors as sources to the PHI + // instructions. + bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); + if (PreRegAlloc) + UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); + + // If it is dead, remove it. + if (isDead) { + NumInstrDups -= MBB->size(); + RemoveDeadBlock(MBB); + ++NumDeadBlocks; + } + + // Update SSA form. + if (!SSAUpdateVRs.empty()) { + for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { + unsigned VReg = SSAUpdateVRs[i]; + SSAUpdate.Initialize(VReg); + + // If the original definition is still around, add it as an available + // value. + MachineInstr *DefMI = MRI->getVRegDef(VReg); + MachineBasicBlock *DefBB = 0; + if (DefMI) { + DefBB = DefMI->getParent(); + SSAUpdate.AddAvailableValue(DefBB, VReg); + } + + // Add the new vregs as available values. + DenseMap<unsigned, AvailableValsTy>::iterator LI = + SSAUpdateVals.find(VReg); + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + SSAUpdate.AddAvailableValue(SrcBB, SrcReg); + } + + // Rewrite uses that are outside of the original def's block. + MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); + while (UI != MRI->use_end()) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI->isDebugValue()) { + // SSAUpdate can replace the use with an undef. That creates + // a debug instruction that is a kill. + // FIXME: Should it SSAUpdate job to delete debug instructions + // instead of replacing the use with undef? + UseMI->eraseFromParent(); + continue; + } + if (UseMI->getParent() == DefBB && !UseMI->isPHI()) + continue; + SSAUpdate.RewriteUse(UseMO); + } + } + + SSAUpdateVRs.clear(); + SSAUpdateVals.clear(); + } + + // Eliminate some of the copies inserted by tail duplication to maintain + // SSA form. + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + MachineInstr *Copy = Copies[i]; + if (!Copy->isCopy()) + continue; + unsigned Dst = Copy->getOperand(0).getReg(); + unsigned Src = Copy->getOperand(1).getReg(); + MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); + if (++UI == MRI->use_end()) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); + } + } + + if (NewPHIs.size()) + NumAddedPHIs += NewPHIs.size(); + + return true; +} + /// TailDuplicateBlocks - Look for small blocks that are unconditionally /// branched to and do not fall through. Tail-duplicate their instructions /// into their predecessors to eliminate (dynamic) branches. @@ -180,100 +295,22 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { VerifyPHIs(MF, true); } - SmallVector<MachineInstr*, 8> NewPHIs; - MachineSSAUpdater SSAUpdate(MF, &NewPHIs); - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; if (NumTails == TailDupLimit) break; - // Save the successors list. - SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), - MBB->succ_end()); - - SmallVector<MachineBasicBlock*, 8> TDBBs; - SmallVector<MachineInstr*, 16> Copies; - if (TailDuplicate(MBB, MF, TDBBs, Copies)) { - ++NumTails; - - // TailBB's immediate successors are now successors of those predecessors - // which duplicated TailBB. Add the predecessors as sources to the PHI - // instructions. - bool isDead = MBB->pred_empty(); - if (PreRegAlloc) - UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); - - // If it is dead, remove it. - if (isDead) { - NumInstrDups -= MBB->size(); - RemoveDeadBlock(MBB); - ++NumDeadBlocks; - } - - // Update SSA form. - if (!SSAUpdateVRs.empty()) { - for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { - unsigned VReg = SSAUpdateVRs[i]; - SSAUpdate.Initialize(VReg); - - // If the original definition is still around, add it as an available - // value. - MachineInstr *DefMI = MRI->getVRegDef(VReg); - MachineBasicBlock *DefBB = 0; - if (DefMI) { - DefBB = DefMI->getParent(); - SSAUpdate.AddAvailableValue(DefBB, VReg); - } - - // Add the new vregs as available values. - DenseMap<unsigned, AvailableValsTy>::iterator LI = - SSAUpdateVals.find(VReg); - for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { - MachineBasicBlock *SrcBB = LI->second[j].first; - unsigned SrcReg = LI->second[j].second; - SSAUpdate.AddAvailableValue(SrcBB, SrcReg); - } - - // Rewrite uses that are outside of the original def's block. - MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); - while (UI != MRI->use_end()) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; - ++UI; - if (UseMI->getParent() == DefBB && !UseMI->isPHI()) - continue; - SSAUpdate.RewriteUse(UseMO); - } - } + bool IsSimple = isSimpleBB(MBB); - SSAUpdateVRs.clear(); - SSAUpdateVals.clear(); - } - - // Eliminate some of the copies inserted by tail duplication to maintain - // SSA form. - for (unsigned i = 0, e = Copies.size(); i != e; ++i) { - MachineInstr *Copy = Copies[i]; - if (!Copy->isCopy()) - continue; - unsigned Dst = Copy->getOperand(0).getReg(); - unsigned Src = Copy->getOperand(1).getReg(); - MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); - if (++UI == MRI->use_end()) { - // Copy is the only use. Do trivial copy propagation here. - MRI->replaceRegWith(Dst, Src); - Copy->eraseFromParent(); - } - } + if (!shouldTailDuplicate(MF, IsSimple, *MBB)) + continue; - if (PreRegAlloc && TailDupVerify) - VerifyPHIs(MF, false); - MadeChange = true; - } + MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF); } - NumAddedPHIs += NewPHIs.size(); + + if (PreRegAlloc && TailDupVerify) + VerifyPHIs(MF, false); return MadeChange; } @@ -283,6 +320,8 @@ static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; + if (UseMI->isDebugValue()) + continue; if (UseMI->getParent() != BB) return true; } @@ -485,11 +524,16 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, /// shouldTailDuplicate - Determine if it is profitable to duplicate this block. bool TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, + bool IsSimple, MachineBasicBlock &TailBB) { // Only duplicate blocks that end with unconditional branches. if (TailBB.canFallThrough()) return false; + // Don't try to tail-duplicate single-block loops. + if (TailBB.isSuccessor(&TailBB)) + return false; + // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. @@ -500,73 +544,208 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, else MaxDuplicateCount = TailDuplicateSize; - if (PreRegAlloc) { - if (TailBB.empty()) - return false; - const TargetInstrDesc &TID = TailBB.back().getDesc(); - // Pre-regalloc tail duplication hurts compile time and doesn't help - // much except for indirect branches. - if (!TID.isIndirectBranch()) - return false; - // If the target has hardware branch prediction that can handle indirect - // branches, duplicating them can often make them predictable when there - // are common paths through the code. The limit needs to be high enough - // to allow undoing the effects of tail merging and other optimizations - // that rearrange the predecessors of the indirect branch. - MaxDuplicateCount = 20; - } + // If the target has hardware branch prediction that can handle indirect + // branches, duplicating them can often make them predictable when there + // are common paths through the code. The limit needs to be high enough + // to allow undoing the effects of tail merging and other optimizations + // that rearrange the predecessors of the indirect branch. - // Don't try to tail-duplicate single-block loops. - if (TailBB.isSuccessor(&TailBB)) - return false; + bool HasIndirectbr = false; + if (!TailBB.empty()) + HasIndirectbr = TailBB.back().getDesc().isIndirectBranch(); + + if (HasIndirectbr && PreRegAlloc) + MaxDuplicateCount = 20; // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; - bool HasCall = false; for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. - if (I->getDesc().isNotDuplicable()) return false; + if (I->getDesc().isNotDuplicable()) + return false; + // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. - if (PreRegAlloc && I->getDesc().isReturn()) return false; - // Don't duplicate more than the threshold. - if (InstrCount == MaxDuplicateCount) return false; - // Remember if we saw a call. - if (I->getDesc().isCall()) HasCall = true; + if (PreRegAlloc && I->getDesc().isReturn()) + return false; + + // Avoid duplicating calls before register allocation. Calls presents a + // barrier to register allocation so duplicating them may end up increasing + // spills. + if (PreRegAlloc && I->getDesc().isCall()) + return false; + if (!I->isPHI() && !I->isDebugValue()) InstrCount += 1; + + if (InstrCount > MaxDuplicateCount) + return false; } - // Don't tail-duplicate calls before register allocation. Calls presents a - // barrier to register allocation so duplicating them may end up increasing - // spills. - if (InstrCount > 1 && (PreRegAlloc && HasCall)) + + if (HasIndirectbr && PreRegAlloc) + return true; + + if (IsSimple) + return true; + + if (!PreRegAlloc) + return true; + + return canCompletelyDuplicateBB(TailBB); +} + +/// isSimpleBB - True if this BB has only one unconditional jump. +bool +TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { + if (TailBB->succ_size() != 1) + return false; + if (TailBB->pred_empty()) return false; + MachineBasicBlock::iterator I = TailBB->begin(); + MachineBasicBlock::iterator E = TailBB->end(); + while (I != E && I->isDebugValue()) + ++I; + if (I == E) + return true; + return I->getDesc().isUnconditionalBranch(); +} + +static bool +bothUsedInPHI(const MachineBasicBlock &A, + SmallPtrSet<MachineBasicBlock*, 8> SuccsB) { + for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(), + SE = A.succ_end(); SI != SE; ++SI) { + MachineBasicBlock *BB = *SI; + if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) + return true; + } + + return false; +} + +bool +TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { + SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end()); + + for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), + PE = BB.pred_end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + if (PredBB->succ_size() > 1) + return false; + + MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + return false; + if (!PredCond.empty()) + return false; + } return true; } +bool +TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + const DenseSet<unsigned> &UsedByPhi, + SmallVector<MachineInstr*, 16> &Copies) { + SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), + TailBB->succ_end()); + SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); + bool Changed = false; + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + if (PredBB->getLandingPadSuccessor()) + continue; + + if (bothUsedInPHI(*PredBB, Succs)) + continue; + + MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + + Changed = true; + DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From simple Succ: " << *TailBB); + + MachineBasicBlock *NewTarget = *TailBB->succ_begin(); + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB)); + + // Make PredFBB explicit. + if (PredCond.empty()) + PredFBB = PredTBB; + + // Make fall through explicit. + if (!PredTBB) + PredTBB = NextBB; + if (!PredFBB) + PredFBB = NextBB; + + // Redirect + if (PredFBB == TailBB) + PredFBB = NewTarget; + if (PredTBB == TailBB) + PredTBB = NewTarget; + + // Make the branch unconditional if possible + if (PredTBB == PredFBB) { + PredCond.clear(); + PredFBB = NULL; + } + + // Avoid adding fall through branches. + if (PredFBB == NextBB) + PredFBB = NULL; + if (PredTBB == NextBB && PredFBB == NULL) + PredTBB = NULL; + + TII->RemoveBranch(*PredBB); + + if (PredTBB) + TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); + + PredBB->removeSuccessor(TailBB); + unsigned NumSuccessors = PredBB->succ_size(); + assert(NumSuccessors <= 1); + if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget) + PredBB->addSuccessor(NewTarget); + + TDBBs.push_back(PredBB); + } + return Changed; +} + /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool -TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, +TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, + bool IsSimple, + MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies) { - if (!shouldTailDuplicate(MF, *TailBB)) - return false; - DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + DenseSet<unsigned> UsedByPhi; + getRegsUsedByPHIs(*TailBB, &UsedByPhi); + + if (IsSimple) + return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); + // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); - DenseSet<unsigned> UsedByPhi; - getRegsUsedByPHIs(*TailBB, &UsedByPhi); for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; @@ -618,6 +797,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } + + // Simplify + TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); + NumInstrDups += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 34e2b33..86e71d8 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -59,8 +59,8 @@ TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // the two operands returned by findCommutedOpIndices. MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool NewMI) const { - const TargetInstrDesc &TID = MI->getDesc(); - bool HasDef = TID.getNumDefs(); + const MCInstrDesc &MCID = MI->getDesc(); + bool HasDef = MCID.getNumDefs(); if (HasDef && !MI->getOperand(0).isReg()) // No idea how to commute this instruction. Target should implement its own. return 0; @@ -81,7 +81,7 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool ChangeReg0 = false; if (HasDef && MI->getOperand(0).getReg() == Reg1) { // Must be two address instruction! - assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) && "Expecting a two-address instruction!"); Reg2IsKill = false; ChangeReg0 = true; @@ -119,12 +119,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isCommutable()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isCommutable()) return false; // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this // is not true, then the target must implement this. - SrcOpIdx1 = TID.getNumDefs(); + SrcOpIdx1 = MCID.getNumDefs(); SrcOpIdx2 = SrcOpIdx1 + 1; if (!MI->getOperand(SrcOpIdx1).isReg() || !MI->getOperand(SrcOpIdx2).isReg()) @@ -137,12 +137,12 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { bool MadeChange = false; - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isPredicable()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isPredicable()) return false; for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (TID.OpInfo[i].isPredicate()) { + if (MCID.OpInfo[i].isPredicate()) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { MO.setReg(Pred[j].getReg()); @@ -332,10 +332,10 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) return true; - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); // Avoid instructions obviously unsafe for remat. - if (TID.isNotDuplicable() || TID.mayStore() || + if (MCID.isNotDuplicable() || MCID.mayStore() || MI->hasUnmodeledSideEffects()) return false; @@ -345,7 +345,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, return false; // Avoid instructions which load from potentially varying memory. - if (TID.mayLoad() && !MI->isInvariantLoad(AA)) + if (MCID.mayLoad() && !MI->isInvariantLoad(AA)) return false; // If any of the registers accessed are non-constant, conservatively assume diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index cdac42d..a3c5620 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -43,6 +43,19 @@ using namespace dwarf; // ELF //===----------------------------------------------------------------------===// +TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() + : TargetLoweringObjectFile(), + TLSDataSection(0), + TLSBSSSection(0), + DataRelSection(0), + DataRelLocalSection(0), + DataRelROSection(0), + DataRelROLocalSection(0), + MergeableConst4Section(0), + MergeableConst8Section(0), + MergeableConst16Section(0) { +} + void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFile::Initialize(Ctx, TM); @@ -189,8 +202,8 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, return Mang->getSymbol(GV); break; case dwarf::DW_EH_PE_pcrel: { - Twine FullName = StringRef("DW.ref.") + Mang->getSymbol(GV)->getName(); - return getContext().GetOrCreateSymbol(FullName); + return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + + Mang->getSymbol(GV)->getName()); break; } } @@ -199,13 +212,13 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, const TargetMachine &TM, const MCSymbol *Sym) const { - Twine FullName = StringRef("DW.ref.") + Sym->getName(); - MCSymbol *Label = getContext().GetOrCreateSymbol(FullName); + SmallString<64> NameData("DW.ref."); + NameData += Sym->getName(); + MCSymbol *Label = getContext().GetOrCreateSymbol(NameData); Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); Streamer.EmitSymbolAttribute(Label, MCSA_Weak); - Twine SectionName = StringRef(".data.") + Label->getName(); - SmallString<64> NameData; - SectionName.toVector(NameData); + StringRef Prefix = ".data."; + NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end()); unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; const MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS, @@ -480,6 +493,27 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, // MachO //===----------------------------------------------------------------------===// +TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() + : TargetLoweringObjectFile(), + TLSDataSection(0), + TLSBSSSection(0), + TLSTLVSection(0), + TLSThreadInitSection(0), + CStringSection(0), + UStringSection(0), + TextCoalSection(0), + ConstTextCoalSection(0), + ConstDataSection(0), + DataCoalSection(0), + DataCommonSection(0), + DataBSSSection(0), + FourByteConstantSection(0), + EightByteConstantSection(0), + SixteenByteConstantSection(0), + LazySymbolPointerSection(0), + NonLazySymbolPointerSection(0) { +} + void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, const TargetMachine &TM) { IsFunctionEHFrameSymbolPrivate = false; @@ -605,6 +639,13 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, // Exception Handling. LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0, SectionKind::getReadOnlyWithRel()); + + if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) + CompactUnwindSection = + getContext().getMachOSection("__LD", "__compact_unwind", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getReadOnly()); + // Debug Information. DwarfAbbrevSection = getContext().getMachOSection("__DWARF", "__debug_abbrev", @@ -884,6 +925,13 @@ unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const { // COFF //===----------------------------------------------------------------------===// +TargetLoweringObjectFileCOFF::TargetLoweringObjectFileCOFF() + : TargetLoweringObjectFile(), + DrectveSection(0), + PDataSection(0), + XDataSection(0) { +} + void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFile::Initialize(Ctx, TM); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index f54d879..6d6244e 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -280,8 +280,8 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, /// isTwoAddrUse - Return true if the specified MI is using the specified /// register as a two-address operand. static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { - const TargetInstrDesc &TID = UseMI->getDesc(); - for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + const MCInstrDesc &MCID = UseMI->getDesc(); + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(i); if (MO.isReg() && MO.getReg() == Reg && (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) @@ -443,8 +443,9 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, /// isTwoAddrUse - Return true if the specified MI uses the specified register /// as a two-address use. If so, return the destination register by reference. static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { - const TargetInstrDesc &TID = MI.getDesc(); - unsigned NumOps = MI.isInlineAsm() ? MI.getNumOperands():TID.getNumOperands(); + const MCInstrDesc &MCID = MI.getDesc(); + unsigned NumOps = MI.isInlineAsm() + ? MI.getNumOperands() : MCID.getNumOperands(); for (unsigned i = 0; i != NumOps; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) @@ -761,10 +762,10 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, static bool isSafeToDelete(MachineInstr *MI, const TargetInstrInfo *TII, SmallVector<unsigned, 4> &Kills) { - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayStore() || TID.isCall()) + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.mayStore() || MCID.isCall()) return false; - if (TID.isTerminator() || MI->hasUnmodeledSideEffects()) + if (MCID.isTerminator() || MI->hasUnmodeledSideEffects()) return false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -854,7 +855,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, unsigned Dist, SmallPtrSet<MachineInstr*, 8> &Processed) { - const TargetInstrDesc &TID = mi->getDesc(); + const MCInstrDesc &MCID = mi->getDesc(); unsigned regA = mi->getOperand(DstIdx).getReg(); unsigned regB = mi->getOperand(SrcIdx).getReg(); @@ -876,7 +877,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned regCIdx = ~0U; bool TryCommute = false; bool AggressiveCommute = false; - if (TID.isCommutable() && mi->getNumOperands() >= 3 && + if (MCID.isCommutable() && mi->getNumOperands() >= 3 && TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { if (SrcIdx == SrcOp1) regCIdx = SrcOp2; @@ -907,7 +908,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, if (TargetRegisterInfo::isVirtualRegister(regA)) ScanUses(regA, &*mbbi, Processed); - if (TID.isConvertibleTo3Addr()) { + if (MCID.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { @@ -927,7 +928,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // movq (%rax), %rcx // addq %rdx, %rcx // because it's preferable to schedule a load than a register copy. - if (TID.mayLoad() && !regBKilled) { + if (MCID.mayLoad() && !regBKilled) { // Determine if a load can be unfolded. unsigned LoadRegIndex; unsigned NewOpc = @@ -936,14 +937,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, /*UnfoldStore=*/false, &LoadRegIndex); if (NewOpc != 0) { - const TargetInstrDesc &UnfoldTID = TII->get(NewOpc); - if (UnfoldTID.getNumDefs() == 1) { + const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); + if (UnfoldMCID.getNumDefs() == 1) { MachineFunction &MF = *mbbi->getParent(); // Unfold the load. DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); const TargetRegisterClass *RC = - UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI); + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; if (!TII->unfoldMemoryOperand(MF, mi, Reg, @@ -1067,7 +1068,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { if (mi->isRegSequence()) RegSequences.push_back(&*mi); - const TargetInstrDesc &TID = mi->getDesc(); + const MCInstrDesc &MCID = mi->getDesc(); bool FirstTied = true; DistanceMap.insert(std::make_pair(mi, ++Dist)); @@ -1077,7 +1078,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. unsigned NumOps = mi->isInlineAsm() - ? mi->getNumOperands() : TID.getNumOperands(); + ? mi->getNumOperands() : MCID.getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) @@ -1095,12 +1096,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { "two address instruction invalid"); unsigned regB = mi->getOperand(SrcIdx).getReg(); - TiedOperandMap::iterator OI = TiedOperands.find(regB); - if (OI == TiedOperands.end()) { - SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair; - OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first; - } - OI->second.push_back(std::make_pair(SrcIdx, DstIdx)); + TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx)); } // Now iterate over the information collected above. diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index ba50f4e..03abff3 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -208,6 +208,11 @@ namespace llvm { /// @brief returns the register allocation preference. unsigned getRegAllocPref(unsigned virtReg); + /// @brief returns true if VirtReg is assigned to its preferred physreg. + bool hasPreferredPhys(unsigned VirtReg) { + return getPhys(VirtReg) == getRegAllocPref(VirtReg); + } + /// @brief records virtReg is a split live interval from SReg. void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { Virt2SplitMap[virtReg] = SReg; diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 1850658..a5ec797 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -679,8 +679,8 @@ static void ReMaterialize(MachineBasicBlock &MBB, VirtRegMap &VRM) { MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg); #ifndef NDEBUG - const TargetInstrDesc &TID = ReMatDefMI->getDesc(); - assert(TID.getNumDefs() == 1 && + const MCInstrDesc &MCID = ReMatDefMI->getDesc(); + assert(MCID.getNumDefs() == 1 && "Don't know how to remat instructions that define > 1 values!"); #endif TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI); @@ -1483,11 +1483,11 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII, /// where SrcReg is r1 and it is tied to r0. Return true if after /// commuting this instruction it will be r0 = op r2, r1. static bool CommuteChangesDestination(MachineInstr *DefMI, - const TargetInstrDesc &TID, + const MCInstrDesc &MCID, unsigned SrcReg, const TargetInstrInfo *TII, unsigned &DstIdx) { - if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3) + if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3) return false; if (!DefMI->getOperand(1).isReg() || DefMI->getOperand(1).getReg() != SrcReg) @@ -1527,11 +1527,11 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, MachineInstr &MI = *MII; MachineBasicBlock::iterator DefMII = prior(MII); MachineInstr *DefMI = DefMII; - const TargetInstrDesc &TID = DefMI->getDesc(); + const MCInstrDesc &MCID = DefMI->getDesc(); unsigned NewDstIdx; if (DefMII != MBB->begin() && - TID.isCommutable() && - CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) { + MCID.isCommutable() && + CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) { MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) @@ -1658,9 +1658,9 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII, /// isSafeToDelete - Return true if this instruction doesn't produce any side /// effect and all of its defs are dead. static bool isSafeToDelete(MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); - if (TID.mayLoad() || TID.mayStore() || TID.isTerminator() || - TID.isCall() || TID.isBarrier() || TID.isReturn() || + const MCInstrDesc &MCID = MI.getDesc(); + if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() || + MCID.isCall() || MCID.isBarrier() || MCID.isReturn() || MI.isLabel() || MI.isDebugValue() || MI.hasUnmodeledSideEffects()) return false; |