diff options
author | dim <dim@FreeBSD.org> | 2012-08-15 19:34:23 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2012-08-15 19:34:23 +0000 |
commit | 721c201bd55ffb73cb2ba8d39e0570fa38c44e15 (patch) | |
tree | eacfc83d988e4b9d11114387ae7dc41243f2a363 /lib/CodeGen | |
parent | 2b2816e083a455f7a656ae88b0fd059d1688bb36 (diff) | |
download | FreeBSD-src-721c201bd55ffb73cb2ba8d39e0570fa38c44e15.zip FreeBSD-src-721c201bd55ffb73cb2ba8d39e0570fa38c44e15.tar.gz |
Vendor import of llvm trunk r161861:
http://llvm.org/svn/llvm-project/llvm/trunk@161861
Diffstat (limited to 'lib/CodeGen')
123 files changed, 10812 insertions, 7098 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 822a564..205480a 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -16,10 +16,10 @@ #define DEBUG_TYPE "post-RA-sched" #include "AggressiveAntiDepBreaker.h" -#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -157,8 +157,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { - for (const uint16_t *Alias = TRI->getOverlaps(*I); - unsigned Reg = *Alias; ++Alias) { + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; State->UnionGroups(Reg, 0); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; @@ -173,8 +173,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - for (const uint16_t *Alias = TRI->getOverlaps(*I); - unsigned Reg = *Alias; ++Alias) { + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; State->UnionGroups(Reg, 0); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; @@ -189,8 +189,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; - for (const uint16_t *Alias = TRI->getOverlaps(Reg); - unsigned AliasReg = *Alias; ++Alias) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; State->UnionGroups(AliasReg, 0); KillIndices[AliasReg] = BB->size(); DefIndices[AliasReg] = ~0u; @@ -265,10 +265,8 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, IsImplicitDefUse(MI, MO)) { const unsigned Reg = MO.getReg(); PassthruRegs.insert(Reg); - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - PassthruRegs.insert(*Subreg); - } + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + PassthruRegs.insert(*SubRegs); } } } @@ -333,9 +331,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); } // Repeat for subregisters. - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - unsigned SubregReg = *Subreg; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubregReg = *SubRegs; if (!State->IsLive(SubregReg)) { KillIndices[SubregReg] = KillIdx; DefIndices[SubregReg] = ~0u; @@ -392,8 +389,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // Any aliased that are live at this point are completely or // partially defined here, so group those aliases with Reg. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; + for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; if (State->IsLive(AliasReg)) { State->UnionGroups(Reg, AliasReg); DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << @@ -404,7 +401,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) - RC = TII->getRegClass(MI->getDesc(), i, TRI); + RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } @@ -423,9 +420,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, continue; // Update def for Reg and aliases. - for (const uint16_t *Alias = TRI->getOverlaps(Reg); - unsigned AliasReg = *Alias; ++Alias) - DefIndices[AliasReg] = Count; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + DefIndices[*AI] = Count; } } @@ -479,7 +475,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) - RC = TII->getRegClass(MI->getDesc(), i, TRI); + RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } @@ -678,9 +674,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( goto next_super_reg; } else { bool found = false; - for (const uint16_t *Alias = TRI->getAliasSet(NewReg); - *Alias; ++Alias) { - unsigned AliasReg = *Alias; + for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) { DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)"); diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 87f6431..32ad34a 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -15,9 +15,9 @@ //===----------------------------------------------------------------------===// #include "AllocationOrder.h" -#include "RegisterClassInfo.h" #include "VirtRegMap.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" using namespace llvm; diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 00874d4..447f398 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -203,6 +203,63 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { } } + +/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look +/// through it (and any transitive noop operands to it) and return its input +/// value. This is used to determine if a tail call can be formed. +/// +static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) { + // If V is not an instruction, it can't be looked through. + const Instruction *I = dyn_cast<Instruction>(V); + if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V; + + Value *Op = I->getOperand(0); + + // Look through truly no-op truncates. + if (isa<TruncInst>(I) && + TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType())) + return getNoopInput(I->getOperand(0), TLI); + + // Look through truly no-op bitcasts. + if (isa<BitCastInst>(I)) { + // No type change at all. + if (Op->getType() == I->getType()) + return getNoopInput(Op, TLI); + + // Pointer to pointer cast. + if (Op->getType()->isPointerTy() && I->getType()->isPointerTy()) + return getNoopInput(Op, TLI); + + if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) && + TLI.isTypeLegal(EVT::getEVT(Op->getType())) && + TLI.isTypeLegal(EVT::getEVT(I->getType()))) + return getNoopInput(Op, TLI); + } + + // Look through inttoptr. + if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) { + // Make sure this isn't a truncating or extending cast. We could support + // this eventually, but don't bother for now. + if (TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(Op->getType())->getBitWidth()) + return getNoopInput(Op, TLI); + } + + // Look through ptrtoint. + if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) { + // Make sure this isn't a truncating or extending cast. We could support + // this eventually, but don't bother for now. + if (TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(I->getType())->getBitWidth()) + return getNoopInput(Op, TLI); + } + + + // Otherwise it's not something we can look through. + return V; +} + + /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with /// a return and there's nothing that needs to be scheduled @@ -226,7 +283,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // been fully understood. if (!Ret && (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt || - !isa<UnreachableInst>(Term))) return false; + !isa<UnreachableInst>(Term))) + return false; // If I will have a chain, make sure no other instruction that will have a // chain interposes between I and the return. @@ -264,28 +322,28 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, return false; // Otherwise, make sure the unmodified return value of I is the return value. - for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ; - U = dyn_cast<Instruction>(U->getOperand(0))) { - if (!U) - return false; - if (!U->hasOneUse()) + // We handle two cases: multiple return values + scalars. + Value *RetVal = Ret->getOperand(0); + if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType())) + // Handle scalars first. + return getNoopInput(Ret->getOperand(0), TLI) == I; + + // If this is an aggregate return, look through the insert/extract values and + // see if each is transparent. + for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements(); + i != e; ++i) { + const Value *InScalar = FindInsertedValue(RetVal, i); + if (InScalar == 0) return false; + InScalar = getNoopInput(InScalar, TLI); + + // If the scalar value being inserted is an extractvalue of the right index + // from the call, then everything is good. + const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar); + if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 || + EVI->getIndices()[0] != i) return false; - if (U == I) - break; - // Check for a truly no-op truncate. - if (isa<TruncInst>(U) && - TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType())) - continue; - // Check for a truly no-op bitcast. - if (isa<BitCastInst>(U) && - (U->getOperand(0)->getType() == U->getType() || - (U->getOperand(0)->getType()->isPointerTy() && - U->getType()->isPointerTy()))) - continue; - // Otherwise it's not a true no-op. - return false; } - + return true; } diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index b60fda8..bf5d8c4 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -44,9 +44,7 @@ EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, ARMException::ARMException(AsmPrinter *A) - : DwarfException(A), - shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) - {} + : DwarfException(A) {} ARMException::~ARMException() {} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index b0b2ff4..d9be7a1 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "llvm/DebugInfo.h" #include "llvm/Module.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -24,7 +25,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -475,10 +475,8 @@ void AsmPrinter::EmitFunctionHeader() { void AsmPrinter::EmitFunctionEntryLabel() { // The function label could have already been emitted if two symbols end up // conflicting due to asm renaming. Detect this and emit an error. - if (CurrentFnSym->isUndefined()) { - OutStreamer.ForceCodeRegion(); + if (CurrentFnSym->isUndefined()) return OutStreamer.EmitLabel(CurrentFnSym); - } report_fatal_error("'" + Twine(CurrentFnSym->getName()) + "' label emitted multiple times to assembly file"); @@ -615,7 +613,7 @@ bool AsmPrinter::needsSEHMoves() { } bool AsmPrinter::needsRelocationsForDwarfStringPool() const { - return MAI->doesDwarfUseRelocationsForStringPool(); + return MAI->doesDwarfUseRelocationsAcrossSections(); } void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { @@ -798,8 +796,8 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { const TargetRegisterInfo *TRI = TM.getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); - for (const uint16_t *SR = TRI->getSuperRegisters(MLoc.getReg()); - *SR && Reg < 0; ++SR) { + for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid() && Reg < 0; + ++SR) { Reg = TRI->getDwarfRegNum(*SR, false); // FIXME: Get the bit range this register uses of the superregister // so that we can produce a DW_OP_bit_piece @@ -1085,15 +1083,6 @@ void AsmPrinter::EmitJumpTableInfo() { EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); - // If we know the form of the jump table, go ahead and tag it as such. - if (!JTInDiffSection) { - if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32) { - OutStreamer.EmitJumpTable32Region(); - } else { - OutStreamer.EmitDataRegion(); - } - } - for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; @@ -1399,13 +1388,14 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, unsigned Size) const { - // Emit Label+Offset - const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), - MCConstantExpr::Create(Offset, OutContext), - OutContext); + // Emit Label+Offset (or just Label if Offset is zero) + const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); + if (Offset) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(Offset, OutContext), + OutContext); - OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/); + OutStreamer.EmitValue(Expr, Size, 0/*AddrSpace*/); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index d605854..db43b06 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -326,11 +326,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; } - // We may have a location metadata attached to the end of the - // instruction, and at no point should see metadata at any - // other point while processing. It's an error if so. + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. if (OpNo >= MI->getNumOperands() || - MI->getOperand(OpNo).isMetadata()) { + MI->getOperand(OpNo).isMetadata()) { Error = true; } else { unsigned OpFlags = MI->getOperand(OpNo).getImm(); @@ -409,9 +409,28 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, /// instruction, using the specified assembler variant. Targets should /// override this to format as appropriate. bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) { - // Target doesn't support this yet! + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + const MachineOperand &MO = MI->getOperand(OpNo); + switch (ExtraCode[0]) { + default: + return true; // Unknown modifier. + case 'c': // Substitute immediate value without immediate syntax + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + O << MO.getImm(); + return false; + case 'n': // Negate the immediate constant. + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + O << -MO.getImm(); + return false; + } + } return true; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index cc5b642..d30e5bb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing dwarf compile unit. +// This file contains support for constructing a dwarf compile unit. // //===----------------------------------------------------------------------===// @@ -17,9 +17,9 @@ #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "llvm/Constants.h" +#include "llvm/DIBuilder.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/Support/Debug.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" @@ -33,7 +33,7 @@ using namespace llvm; /// CompileUnit - Compile unit constructor. CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, - DwarfDebug *DW) + DwarfDebug *DW) : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } @@ -198,7 +198,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { return; DIFile File = Ty.getFile(); unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(), - File.getDirectory()); + File.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -308,7 +308,8 @@ void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + if (!Location.isReg()) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } else llvm_unreachable("unknown DIBuilder Opcode"); } @@ -418,27 +419,12 @@ void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, // Decode the original location, and use that as the start of the byref // variable's location. - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - if (Location.isReg()) { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } + if (Location.isReg()) + addRegisterOp(Block, Location.getReg()); + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). @@ -646,8 +632,7 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty, - unsigned Attribute) { +void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { if (!Ty.Verify()) return; @@ -776,6 +761,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { Buffer.addChild(ElemDie); } } + DIType DTy = CTy.getTypeDerivedFrom(); + if (DTy.Verify()) { + addType(&Buffer, DTy); + addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1); + } } break; case dwarf::DW_TAG_subroutine_type: { @@ -801,9 +791,9 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add prototype flag if we're dealing with a C language and the // function has been prototyped. if (isPrototyped && - (Language == dwarf::DW_LANG_C89 || - Language == dwarf::DW_LANG_C99 || - Language == dwarf::DW_LANG_ObjC)) + (Language == dwarf::DW_LANG_C89 || + Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); } break; @@ -846,19 +836,19 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); addSourceLine(ElemDie, DV); } else if (Element.isDerivedType()) { - DIDerivedType DDTy(Element); - if (DDTy.getTag() == dwarf::DW_TAG_friend) { - ElemDie = new DIE(dwarf::DW_TAG_friend); - addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); - } else - ElemDie = createMemberDIE(DIDerivedType(Element)); + DIDerivedType DDTy(Element); + if (DDTy.getTag() == dwarf::DW_TAG_friend) { + ElemDie = new DIE(dwarf::DW_TAG_friend); + addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); + } else + ElemDie = createMemberDIE(DIDerivedType(Element)); } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); ElemDie = new DIE(Property.getTag()); StringRef PropertyName = Property.getObjCPropertyName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); - addType(ElemDie, Property.getType()); - addSourceLine(ElemDie, Property); + addType(ElemDie, Property.getType()); + addSourceLine(ElemDie, Property); StringRef GetterName = Property.getObjCPropertyGetterName(); if (!GetterName.empty()) addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); @@ -925,19 +915,21 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, Name); - if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type - || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - { + if (Tag == dwarf::DW_TAG_enumeration_type || + Tag == dwarf::DW_TAG_class_type || + Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) + // TODO: Do we care about size for enum forward declarations? if (Size) addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); - else { + else if (!CTy.isForwardDecl()) // Add zero size if it is not a forward declaration. - if (CTy.isForwardDecl()) - addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - else - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); - } + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + + // If we're a forward decl, say so. + if (CTy.isForwardDecl()) + addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add source line info if available. if (!CTy.isForwardDecl()) @@ -968,7 +960,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE /// for the given DITemplateValueParameter. DIE * -CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) { +CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){ DIE *ParamDIE = getDIE(TPV); if (ParamDIE) return ParamDIE; @@ -1015,17 +1007,17 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (SPDie) return SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + + // DW_TAG_inlined_subroutine may refer to this DIE. + insertDIE(SP, SPDie); + DISubprogram SPDecl = SP.getFunctionDeclaration(); DIE *DeclDie = NULL; if (SPDecl.isSubprogram()) { DeclDie = getOrCreateSubprogramDIE(SPDecl); } - SPDie = new DIE(dwarf::DW_TAG_subprogram); - - // DW_TAG_inlined_subroutine may refer to this DIE. - insertDIE(SP, SPDie); - // Add to context owner. addToContextOwner(SPDie, SP.getContext()); @@ -1240,7 +1232,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. -void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ +void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, + DIE *IndexTy) { DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); uint64_t L = SR.getLo(); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 45e407e..b4ff9e8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,7 +15,7 @@ #define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DIE.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/DebugInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/OwningPtr.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index cb78878..649684a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -17,9 +17,10 @@ #include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/DIBuilder.h" #include "llvm/Module.h" #include "llvm/Instructions.h" -#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -32,11 +33,10 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -117,7 +117,6 @@ DIType DbgVariable::getType() const { if (getName() == DT.getName()) return (DT.getTypeDerivedFrom()); } - return Ty; } return Ty; } @@ -127,6 +126,7 @@ DIType DbgVariable::getType() const { DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), FirstCU(0), AbbreviationsSet(InitAbbreviationsSetSize), + SourceIdMap(DIEValueAllocator), StringPool(DIEValueAllocator), PrevLabel(NULL) { NextStringPoolNumber = 0; @@ -566,7 +566,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. - if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset()) + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, Asm->GetTempSymbol("section_line")); else @@ -1310,8 +1310,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MOE = MI->operands_end(); MOI != MOE; ++MOI) { if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) continue; - for (const uint16_t *AI = TRI->getOverlaps(MOI->getReg()); - unsigned Reg = *AI; ++AI) { + for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); + AI.isValid(); ++AI) { + unsigned Reg = *AI; const MDNode *Var = LiveUserVar[Reg]; if (!Var) continue; @@ -1381,7 +1382,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MF->getFunction()->getContext()); recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), - 0); + DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0); } } @@ -1421,6 +1422,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIVariable DV(Variables.getElement(i)); if (!DV || !DV.Verify() || !ProcessedVars.insert(DV)) continue; + // Check that DbgVariable for DV wasn't created earlier, when + // findAbstractVariable() was called for inlined instance of DV. + LLVMContext &Ctx = DV->getContext(); + DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx); + if (AbstractVariables.lookup(CleanDV)) + continue; if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) addScopeVariable(Scope, new DbgVariable(DV, NULL)); } @@ -1623,7 +1630,7 @@ void DwarfDebug::emitDIE(DIE *Die) { // DW_AT_range Value encodes offset in debug_range section. DIEInteger *V = cast<DIEInteger>(Values[i]); - if (Asm->MAI->doesDwarfUseLabelOffsetForRanges()) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) { Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, V->getValue(), 4); @@ -1636,10 +1643,14 @@ void DwarfDebug::emitDIE(DIE *Die) { break; } case dwarf::DW_AT_location: { - if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) - Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); - else + if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + Asm->EmitLabelReference(L->getValue(), 4); + else + Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); + } else { Values[i]->EmitValue(Asm, Form); + } break; } case dwarf::DW_AT_accessibility: { @@ -2049,9 +2060,11 @@ void DwarfDebug::emitDebugLoc() { if (Element == DIBuilder::OpPlus) { Asm->EmitInt8(dwarf::DW_OP_plus_uconst); Asm->EmitULEB128(DV.getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) - Asm->EmitInt8(dwarf::DW_OP_deref); - else llvm_unreachable("unknown Opcode found in complex address"); + } else if (Element == DIBuilder::OpDeref) { + if (!Entry.Loc.isReg()) + Asm->EmitInt8(dwarf::DW_OP_deref); + } else + llvm_unreachable("unknown Opcode found in complex address"); } } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 83f30f5..d1d6512 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -14,11 +14,11 @@ #ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#include "DIE.h" +#include "llvm/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Analysis/DebugInfo.h" -#include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallPtrSet.h" @@ -188,6 +188,9 @@ class DwarfDebug { /// MMI - Collected machine module information. MachineModuleInfo *MMI; + /// DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + //===--------------------------------------------------------------------===// // Attributes used to construct specific Dwarf sections. // @@ -210,11 +213,11 @@ class DwarfDebug { /// SourceIdMap - Source id map, i.e. pair of source filename and directory, /// separated by a zero byte, mapped to a unique id. - StringMap<unsigned> SourceIdMap; + StringMap<unsigned, BumpPtrAllocator&> SourceIdMap; /// StringPool - A String->Symbol mapping of strings used by indirect /// references. - StringMap<std::pair<MCSymbol*, unsigned> > StringPool; + StringMap<std::pair<MCSymbol*, unsigned>, BumpPtrAllocator&> StringPool; unsigned NextStringPoolNumber; /// SectionMap - Provides a unique id per text section. @@ -232,7 +235,7 @@ class DwarfDebug { /// ScopeVariables - Collection of dbg variables of a scope. DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables; - /// AbstractVariables - Collection on abstract variables. + /// AbstractVariables - Collection of abstract variables. DenseMap<const MDNode *, DbgVariable *> AbstractVariables; /// DotDebugLocEntries - Collection of DotDebugLocEntry. @@ -292,9 +295,6 @@ class DwarfDebug { std::vector<FunctionDebugFrameInfo> DebugFrames; - // DIEValueAllocator - All DIEValues are allocated through this allocator. - BumpPtrAllocator DIEValueAllocator; - // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. @@ -333,9 +333,6 @@ private: /// of the function. DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); - /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable *DV, LexicalScope *S); - /// constructScopeDIE - Construct a DIE for this scope. DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); @@ -517,9 +514,6 @@ public: /// in the SourceIds map. unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName); - /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(DISubprogram SP); - /// getStringPool - returns the entry into the start of the pool. MCSymbol *getStringPool(); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index b5f86ab..75f6056 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -175,17 +175,6 @@ public: }; class ARMException : public DwarfException { - /// shouldEmitTable - Per-function flag to indicate if EH tables should - /// be emitted. - bool shouldEmitTable; - - /// shouldEmitMoves - Per-function flag to indicate if frame moves info - /// should be emitted. - bool shouldEmitMoves; - - /// shouldEmitTableModule - Per-module flag to indicate if EH tables - /// should be emitted. - bool shouldEmitTableModule; public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index ef1d2ba..fb65bb7 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -137,9 +137,8 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { break; unsigned Reg = I->getOperand(0).getReg(); ImpDefRegs.insert(Reg); - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - ImpDefRegs.insert(SubReg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + ImpDefRegs.insert(*SubRegs); ++I; } if (ImpDefRegs.empty()) @@ -188,7 +187,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // Use a RegScavenger to help update liveness when required. MachineRegisterInfo &MRI = MF.getRegInfo(); - if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF)) + if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) RS = new RegScavenger(); else MRI.invalidateLiveness(); @@ -819,10 +818,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, } bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { - - if (!EnableTailMerge) return false; - bool MadeChange = false; + if (!EnableTailMerge) return MadeChange; // First find blocks with no successors. MergePotentials.clear(); @@ -839,6 +836,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (MergePotentials.size() == TailMergeThreshold) for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) TriedMerging.insert(MergePotentials[i].getBlock()); + // See if we can do any tail merging on those. if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(NULL, NULL); @@ -864,88 +862,97 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) { - if (I->pred_size() >= 2) { - SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; - MachineBasicBlock *IBB = I; - MachineBasicBlock *PredBB = prior(I); - MergePotentials.clear(); - for (MachineBasicBlock::pred_iterator P = I->pred_begin(), - E2 = I->pred_end(); - P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { - MachineBasicBlock *PBB = *P; - if (TriedMerging.count(PBB)) - continue; - // Skip blocks that loop to themselves, can't tail merge these. - if (PBB == IBB) - continue; - // Visit each predecessor only once. - if (!UniquePreds.insert(PBB)) - continue; - // Skip blocks which may jump to a landing pad. Can't tail merge these. - if (PBB->getLandingPadSuccessor()) - continue; - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { - // Failing case: IBB is the target of a cbr, and - // we cannot reverse the branch. - SmallVector<MachineOperand, 4> NewCond(Cond); - if (!Cond.empty() && TBB == IBB) { - if (TII->ReverseBranchCondition(NewCond)) + if (I->pred_size() < 2) continue; + SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; + MachineBasicBlock *IBB = I; + MachineBasicBlock *PredBB = prior(I); + MergePotentials.clear(); + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + E2 = I->pred_end(); + P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { + MachineBasicBlock *PBB = *P; + if (TriedMerging.count(PBB)) + continue; + + // Skip blocks that loop to themselves, can't tail merge these. + if (PBB == IBB) + continue; + + // Visit each predecessor only once. + if (!UniquePreds.insert(PBB)) + continue; + + // Skip blocks which may jump to a landing pad. Can't tail merge these. + if (PBB->getLandingPadSuccessor()) + continue; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { + // Failing case: IBB is the target of a cbr, and we cannot reverse the + // branch. + SmallVector<MachineOperand, 4> NewCond(Cond); + if (!Cond.empty() && TBB == IBB) { + if (TII->ReverseBranchCondition(NewCond)) + continue; + // This is the QBB case described above + if (!FBB) + FBB = llvm::next(MachineFunction::iterator(PBB)); + } + + // Failing case: the only way IBB can be reached from PBB is via + // exception handling. Happens for landing pads. Would be nice to have + // a bit in the edge so we didn't have to do all this. + if (IBB->isLandingPad()) { + MachineFunction::iterator IP = PBB; IP++; + MachineBasicBlock *PredNextBB = NULL; + if (IP != MF.end()) + PredNextBB = IP; + if (TBB == NULL) { + if (IBB != PredNextBB) // fallthrough + continue; + } else if (FBB) { + if (TBB != IBB && FBB != IBB) // cbr then ubr + continue; + } else if (Cond.empty()) { + if (TBB != IBB) // ubr + continue; + } else { + if (TBB != IBB && IBB != PredNextBB) // cbr continue; - // This is the QBB case described above - if (!FBB) - FBB = llvm::next(MachineFunction::iterator(PBB)); - } - // Failing case: the only way IBB can be reached from PBB is via - // exception handling. Happens for landing pads. Would be nice - // to have a bit in the edge so we didn't have to do all this. - if (IBB->isLandingPad()) { - MachineFunction::iterator IP = PBB; IP++; - MachineBasicBlock *PredNextBB = NULL; - if (IP != MF.end()) - PredNextBB = IP; - if (TBB == NULL) { - if (IBB != PredNextBB) // fallthrough - continue; - } else if (FBB) { - if (TBB != IBB && FBB != IBB) // cbr then ubr - continue; - } else if (Cond.empty()) { - if (TBB != IBB) // ubr - continue; - } else { - if (TBB != IBB && IBB != PredNextBB) // cbr - continue; - } - } - // Remove the unconditional branch at the end, if any. - if (TBB && (Cond.empty() || FBB)) { - DebugLoc dl; // FIXME: this is nowhere - TII->RemoveBranch(*PBB); - if (!Cond.empty()) - // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } + + // Remove the unconditional branch at the end, if any. + if (TBB && (Cond.empty() || FBB)) { + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PBB); + if (!Cond.empty()) + // reinsert conditional branch only, for now + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); + } + + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } - // If this is a large problem, avoid visiting the same basic blocks - // multiple times. - if (MergePotentials.size() == TailMergeThreshold) - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - TriedMerging.insert(MergePotentials[i].getBlock()); - if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(IBB, PredBB); - // Reinsert an unconditional branch if needed. - // The 1 below can occur as a result of removing blocks in - // TryTailMergeBlocks. - PredBB = prior(I); // this may have been changed in TryTailMergeBlocks - if (MergePotentials.size() == 1 && - MergePotentials.begin()->getBlock() != PredBB) - FixTail(MergePotentials.begin()->getBlock(), IBB, TII); } + + // If this is a large problem, avoid visiting the same basic blocks multiple + // times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(IBB, PredBB); + + // Reinsert an unconditional branch if needed. The 1 below can occur as a + // result of removing blocks in TryTailMergeBlocks. + PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + if (MergePotentials.size() == 1 && + MergePotentials.begin()->getBlock() != PredBB) + FixTail(MergePotentials.begin()->getBlock(), IBB, TII); } + return MadeChange; } @@ -1459,7 +1466,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, } /// findHoistingInsertPosAndDeps - Find the location to move common instructions -/// in successors to. The location is ususally just before the terminator, +/// in successors to. The location is usually just before the terminator, /// however if the terminator is a conditional branch and its previous /// instruction is the flag setting instruction, the previous instruction is /// the preferred location. This function also gathers uses and defs of the @@ -1483,9 +1490,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!Reg) continue; if (MO.isUse()) { - Uses.insert(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Uses.insert(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); } else if (!MO.isDead()) // Don't try to hoist code in the rare case the terminator defines a // register that is later used. @@ -1545,18 +1551,16 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!Reg) continue; if (MO.isUse()) { - Uses.insert(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Uses.insert(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); } else { if (Uses.count(Reg)) { Uses.erase(Reg); - for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - Uses.erase(*SR); // Use getSubRegisters to be conservative + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Uses.erase(*SubRegs); // Use sub-registers to be conservative } - Defs.insert(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Defs.insert(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Defs.insert(*AI); } } @@ -1683,8 +1687,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if (!Reg || !LocalDefsSet.count(Reg)) continue; - for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) - LocalDefsSet.erase(*OR); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.erase(*AI); } // Track local defs so we can update liveins. @@ -1696,8 +1700,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!Reg) continue; LocalDefs.push_back(Reg); - for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) - LocalDefsSet.insert(*OR); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.insert(*AI); } HasDups = true; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 21729cd..2e189ad 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_library(LLVMCodeGen DeadMachineInstructionElim.cpp DFAPacketizer.cpp DwarfEHPrepare.cpp + EarlyIfConversion.cpp EdgeBundles.cpp ExecutionDepsFix.cpp ExpandISelPseudos.cpp @@ -30,6 +31,7 @@ add_llvm_library(LLVMCodeGen LiveInterval.cpp LiveIntervalAnalysis.cpp LiveIntervalUnion.cpp + LiveRegMatrix.cpp LiveStackAnalysis.cpp LiveVariables.cpp LiveRangeCalc.cpp @@ -59,6 +61,7 @@ add_llvm_library(LLVMCodeGen MachineSSAUpdater.cpp MachineScheduler.cpp MachineSink.cpp + MachineTraceMetrics.cpp MachineVerifier.cpp OcamlGC.cpp OptimizePHIs.cpp @@ -77,8 +80,8 @@ add_llvm_library(LLVMCodeGen RegAllocPBQP.cpp RegisterClassInfo.cpp RegisterCoalescer.cpp + RegisterPressure.cpp RegisterScavenging.cpp - RenderMachineFunction.cpp ScheduleDAG.cpp ScheduleDAGInstrs.cpp ScheduleDAGPrinter.cpp @@ -103,5 +106,7 @@ add_llvm_library(LLVMCodeGen VirtRegMap.cpp ) +add_dependencies(LLVMCodeGen intrinsics_gen) + add_subdirectory(SelectionDAG) add_subdirectory(AsmPrinter) diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index ea16a25..939af3f 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -39,18 +39,20 @@ void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { MachineFunctionPass::getAnalysisUsage(au); } -bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { +bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " - << fn.getFunction()->getName() << '\n'); - - LiveIntervals &lis = getAnalysis<LiveIntervals>(); - VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>()); - for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) { - LiveInterval &li = *I->second; - if (TargetRegisterInfo::isVirtualRegister(li.reg)) - vrai.CalculateWeightAndHint(li); + << MF.getFunction()->getName() << '\n'); + + LiveIntervals &LIS = getAnalysis<LiveIntervals>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>()); + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI.reg_nodbg_empty(Reg)) + continue; + VRAI.CalculateWeightAndHint(LIS.getInterval(Reg)); } return false; } @@ -86,6 +88,27 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, return tri.getMatchingSuperReg(hreg, sub, rc); } +// Check if all values in LI are rematerializable +static bool isRematerializable(const LiveInterval &LI, + const LiveIntervals &LIS, + const TargetInstrInfo &TII) { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + const VNInfo *VNI = *I; + if (VNI->isUnused()) + continue; + if (VNI->isPHIDef()) + return false; + + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Dead valno in interval"); + + if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis())) + return false; + } + return true; +} + void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); @@ -171,17 +194,11 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { } // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If none of the defs are - // loads, then it's potentially very cheap to re-materialize. + // it is a preferred candidate for spilling. // FIXME: this gets much more complicated once we support non-trivial // re-materialization. - bool isLoad = false; - if (LIS.isReMaterializable(li, 0, isLoad)) { - if (isLoad) - totalWeight *= 0.9F; - else - totalWeight *= 0.5F; - } + if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo())) + totalWeight *= 0.5F; li.weight = normalizeSpillWeight(totalWeight, li.getSize()); } diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 2b7dfdb..0b747fd 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -49,8 +49,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, Size = MinSize; if (MinAlign > (int)Align) Align = MinAlign; - if (MF.getFrameInfo()->getMaxAlignment() < Align) - MF.getFrameInfo()->setMaxAlignment(Align); + MF.getFrameInfo()->ensureMaxAlignment(Align); TM.getTargetLowering()->HandleByVal(this, Size); unsigned Offset = AllocateStack(Size, Align); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); @@ -58,9 +57,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, /// MarkAllocated - Mark a register and all of its aliases as allocated. void CCState::MarkAllocated(unsigned Reg) { - for (const uint16_t *Alias = TRI.getOverlaps(Reg); - unsigned Reg = *Alias; ++Alias) - UsedRegs[Reg/32] |= 1 << (Reg&31); + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + UsedRegs[*AI/32] |= 1 << (*AI&31); } /// AnalyzeFormalArguments - Analyze an array of argument values, diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index a81bb5c..fb2c2e8 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -23,6 +23,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeCalculateSpillWeightsPass(Registry); initializeCodePlacementOptPass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); initializeExpandISelPseudosPass(Registry); initializeFinalizeMachineBundlesPass(Registry); @@ -53,7 +54,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); initializeRegisterCoalescerPass(Registry); - initializeRenderMachineFunctionPass(Registry); initializeSlotIndexesPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); @@ -65,7 +65,9 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeUnreachableBlockElimPass(Registry); initializeUnreachableMachineBlockElimPass(Registry); initializeVirtRegMapPass(Registry); + initializeVirtRegRewriterPass(Registry); initializeLowerIntrinsicsPass(Registry); + initializeMachineFunctionPrinterPassPass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index c13c05e..99233df 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -201,7 +201,7 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, // fallthrough edge. if (!Prior->isSuccessor(End)) goto next_pred; - // Otherwise we can stop scanning and procede to move the blocks. + // Otherwise we can stop scanning and proceed to move the blocks. break; } // If we hit a switch or something complicated, don't move anything diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index bad5010..a9de1c749 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -62,17 +62,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { - unsigned Reg = *I; - Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BBSize; - DefIndices[Reg] = ~0u; - - // Repeat, for all aliases. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BBSize; - DefIndices[AliasReg] = ~0u; + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BBSize; + DefIndices[Reg] = ~0u; } } } @@ -84,17 +78,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BBSize; - DefIndices[Reg] = ~0u; - - // Repeat, for all aliases. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BBSize; - DefIndices[AliasReg] = ~0u; + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BBSize; + DefIndices[Reg] = ~0u; } } @@ -104,18 +92,12 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { - unsigned Reg = *I; - if (!IsReturnBlock && !Pristine.test(Reg)) continue; - Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BBSize; - DefIndices[Reg] = ~0u; - - // Repeat, for all aliases. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BBSize; - DefIndices[AliasReg] = ~0u; + if (!IsReturnBlock && !Pristine.test(*I)) continue; + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BBSize; + DefIndices[Reg] = ~0u; } } } @@ -208,7 +190,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { const TargetRegisterClass *NewRC = 0; if (i < MI->getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI->getDesc(), i, TRI); + NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -218,11 +200,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); // Now check for aliases. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { // If an alias of the reg is used during the live range, give up. // Note that this allows us to skip checking if AntiDepReg // overlaps with any of the aliases, among other things. - unsigned AliasReg = *Alias; + unsigned AliasReg = *AI; if (Classes[AliasReg]) { Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); @@ -236,9 +218,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (MO.isUse() && Special) { if (!KeepRegs.test(Reg)) { KeepRegs.set(Reg); - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - KeepRegs.set(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + KeepRegs.set(*SubRegs); } } } @@ -247,7 +228,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, unsigned Count) { // Update liveness. - // Proceding upwards, registers that are defed but not used in this + // Proceeding upwards, registers that are defed but not used in this // instruction are now dead. if (!TII->isPredicated(MI)) { @@ -282,9 +263,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, Classes[Reg] = 0; RegRefs.erase(Reg); // Repeat, for all subregs. - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - unsigned SubregReg = *Subreg; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubregReg = *SubRegs; DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; KeepRegs.reset(SubregReg); @@ -292,11 +272,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, RegRefs.erase(SubregReg); } // Conservatively mark super-registers as unusable. - for (const uint16_t *Super = TRI->getSuperRegisters(Reg); - *Super; ++Super) { - unsigned SuperReg = *Super; - Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); - } + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1); } } for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -308,7 +285,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, const TargetRegisterClass *NewRC = 0; if (i < MI->getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI->getDesc(), i, TRI); + NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -328,8 +305,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, "Kill and Def maps aren't consistent for Reg!"); } // Repeat, for all aliases. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; + for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; if (KillIndices[AliasReg] == ~0u) { KillIndices[AliasReg] = Count; DefIndices[AliasReg] = ~0u; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 7746259..ad95c48 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -17,11 +17,11 @@ #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H #include "AntiDepBreaker.h" -#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/BitVector.h" #include <map> diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 5ff641c..ff2f113 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -23,10 +23,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/MC/MCInstrItineraries.h" using namespace llvm; @@ -100,22 +100,23 @@ void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) { reserveResources(&MID); } -namespace { +namespace llvm { // DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides // Schedule method to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - MachineDominatorTree &MDT, bool IsPostRA); + MachineDominatorTree &MDT, bool IsPostRA); // Schedule - Actual scheduling work. void schedule(); }; -} // end anonymous namespace +} DefaultVLIWScheduler::DefaultVLIWScheduler( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, bool IsPostRA) : ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) { + CanHandleTerminators = true; } void DefaultVLIWScheduler::schedule() { @@ -129,49 +130,25 @@ VLIWPacketizerList::VLIWPacketizerList( bool IsPostRA) : TM(MF.getTarget()), MF(MF) { TII = TM.getInstrInfo(); ResourceTracker = TII->CreateTargetScheduleState(&TM, 0); - SchedulerImpl = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); + VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); } // VLIWPacketizerList Dtor VLIWPacketizerList::~VLIWPacketizerList() { - delete SchedulerImpl; - delete ResourceTracker; -} - -// ignorePseudoInstruction - ignore pseudo instructions. -bool VLIWPacketizerList::ignorePseudoInstruction(MachineInstr *MI, - MachineBasicBlock *MBB) { - if (MI->isDebugValue()) - return true; - - if (TII->isSchedulingBoundary(MI, MBB, MF)) - return true; - - return false; -} - -// isSoloInstruction - return true if instruction I must end previous -// packet. -bool VLIWPacketizerList::isSoloInstruction(MachineInstr *I) { - if (I->isInlineAsm()) - return true; - - return false; -} + if (VLIWScheduler) + delete VLIWScheduler; -// addToPacket - Add I to the current packet and reserve resource. -void VLIWPacketizerList::addToPacket(MachineInstr *MI) { - CurrentPacketMIs.push_back(MI); - ResourceTracker->reserveResources(MI); + if (ResourceTracker) + delete ResourceTracker; } // endPacket - End the current packet, bundle packet instructions and reset // DFA state. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, - MachineInstr *I) { + MachineInstr *MI) { if (CurrentPacketMIs.size() > 1) { MachineInstr *MIFirst = CurrentPacketMIs.front(); - finalizeBundle(*MBB, MIFirst, I); + finalizeBundle(*MBB, MIFirst, MI); } CurrentPacketMIs.clear(); ResourceTracker->clearResources(); @@ -181,31 +158,35 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator BeginItr, MachineBasicBlock::iterator EndItr) { - assert(MBB->end() == EndItr && "Bad EndIndex"); - - SchedulerImpl->enterRegion(MBB, BeginItr, EndItr, MBB->size()); - - // Build the DAG without reordering instructions. - SchedulerImpl->schedule(); - - // Remember scheduling units. - SUnits = SchedulerImpl->SUnits; + assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); + VLIWScheduler->startBlock(MBB); + VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size()); + VLIWScheduler->schedule(); + + // Generate MI -> SU map. + MIToSUnit.clear(); + for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) { + SUnit *SU = &VLIWScheduler->SUnits[i]; + MIToSUnit[SU->getInstr()] = SU; + } // The main packetizer loop. for (; BeginItr != EndItr; ++BeginItr) { MachineInstr *MI = BeginItr; - // Ignore pseudo instructions. - if (ignorePseudoInstruction(MI, MBB)) - continue; + this->initPacketizerState(); // End the current packet if needed. - if (isSoloInstruction(MI)) { + if (this->isSoloInstruction(MI)) { endPacket(MBB, MI); continue; } - SUnit *SUI = SchedulerImpl->getSUnit(MI); + // Ignore pseudo instructions. + if (this->ignorePseudoInstruction(MI, MBB)) + continue; + + SUnit *SUI = MIToSUnit[MI]; assert(SUI && "Missing SUnit Info!"); // Ask DFA if machine resource is available for MI. @@ -215,13 +196,13 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); VI != VE; ++VI) { MachineInstr *MJ = *VI; - SUnit *SUJ = SchedulerImpl->getSUnit(MJ); + SUnit *SUJ = MIToSUnit[MJ]; assert(SUJ && "Missing SUnit Info!"); // Is it legal to packetize SUI and SUJ together. - if (!isLegalToPacketizeTogether(SUI, SUJ)) { + if (!this->isLegalToPacketizeTogether(SUI, SUJ)) { // Allow packetization if dependency can be pruned. - if (!isLegalToPruneDependencies(SUI, SUJ)) { + if (!this->isLegalToPruneDependencies(SUI, SUJ)) { // End the packet if dependency cannot be pruned. endPacket(MBB, MI); break; @@ -234,11 +215,11 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, } // Add MI to the current packet. - addToPacket(MI); + BeginItr = this->addToPacket(MI); } // For all instructions in BB. // End any packet left behind. endPacket(MBB, EndItr); - - SchedulerImpl->exitRegion(); + VLIWScheduler->exitRegion(); + VLIWScheduler->finishBlock(); } diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index aa10d1d..b4394e8 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -171,9 +171,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) - LivePhysRegs.reset(*SubRegs); + for (MCSubRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + LivePhysRegs.reset(*SR); } } else if (MO.isRegMask()) { // Register mask of preserved registers. All clobbers are dead. @@ -187,10 +186,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { if (MO.isReg() && MO.isUse()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - LivePhysRegs.set(Reg); - for (const uint16_t *AliasSet = TRI->getAliasSet(Reg); - *AliasSet; ++AliasSet) - LivePhysRegs.set(*AliasSet); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LivePhysRegs.set(*AI); } } } diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 944dd4f..7095624 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -39,7 +39,7 @@ namespace { Constant *RewindFunction; bool InsertUnwindResumeCalls(Function &Fn); - Instruction *GetExceptionObject(ResumeInst *RI); + Value *GetExceptionObject(ResumeInst *RI); public: static char ID; // Pass identification, replacement for typeid. @@ -68,9 +68,9 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { /// GetExceptionObject - Return the exception object from the value passed into /// the 'resume' instruction (typically an aggregate). Clean up any dead /// instructions, including the 'resume' instruction. -Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { +Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { Value *V = RI->getOperand(0); - Instruction *ExnObj = 0; + Value *ExnObj = 0; InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V); LoadInst *SelLoad = 0; InsertValueInst *ExcIVI = 0; @@ -81,7 +81,7 @@ Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0)); if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) && ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) { - ExnObj = cast<Instruction>(ExcIVI->getOperand(1)); + ExnObj = ExcIVI->getOperand(1); SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1)); EraseIVIs = true; } @@ -139,7 +139,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { // _Unwind_Resume to the end of the single resume block. ResumeInst *RI = Resumes.front(); BasicBlock *UnwindBB = RI->getParent(); - Instruction *ExnObj = GetExceptionObject(RI); + Value *ExnObj = GetExceptionObject(RI); // Call the _Unwind_Resume function. CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB); @@ -162,7 +162,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { BasicBlock *Parent = RI->getParent(); BranchInst::Create(UnwindBB, Parent); - Instruction *ExnObj = GetExceptionObject(RI); + Value *ExnObj = GetExceptionObject(RI); PN->addIncoming(ExnObj, Parent); ++NumResumesLowered; diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp new file mode 100644 index 0000000..f9347ef --- /dev/null +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -0,0 +1,803 @@ +//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Early if-conversion is for out-of-order CPUs that don't have a lot of +// predicable instructions. The goal is to eliminate conditional branches that +// may mispredict. +// +// Instructions from both sides of the branch are executed specutatively, and a +// cmov instruction selects the result. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "early-ifcvt" +#include "MachineTraceMetrics.h" +#include "llvm/Function.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// Absolute maximum number of instructions allowed per speculated block. +// This bypasses all other heuristics, so it should be set fairly high. +static cl::opt<unsigned> +BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, + cl::desc("Maximum number of instructions per speculated block.")); + +// Stress testing mode - disable heuristics. +static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden, + cl::desc("Turn all knobs to 11")); + +STATISTIC(NumDiamondsSeen, "Number of diamonds"); +STATISTIC(NumDiamondsConv, "Number of diamonds converted"); +STATISTIC(NumTrianglesSeen, "Number of triangles"); +STATISTIC(NumTrianglesConv, "Number of triangles converted"); + +//===----------------------------------------------------------------------===// +// SSAIfConv +//===----------------------------------------------------------------------===// +// +// The SSAIfConv class performs if-conversion on SSA form machine code after +// determining if it is possible. The class contains no heuristics; external +// code should be used to determine when if-conversion is a good idea. +// +// SSAIfConv can convert both triangles and diamonds: +// +// Triangle: Head Diamond: Head +// | \ / \_ +// | \ / | +// | [TF]BB FBB TBB +// | / \ / +// | / \ / +// Tail Tail +// +// Instructions in the conditional blocks TBB and/or FBB are spliced into the +// Head block, and phis in the Tail block are converted to select instructions. +// +namespace { +class SSAIfConv { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + +public: + /// The block containing the conditional branch. + MachineBasicBlock *Head; + + /// The block containing phis after the if-then-else. + MachineBasicBlock *Tail; + + /// The 'true' conditional block as determined by AnalyzeBranch. + MachineBasicBlock *TBB; + + /// The 'false' conditional block as determined by AnalyzeBranch. + MachineBasicBlock *FBB; + + /// isTriangle - When there is no 'else' block, either TBB or FBB will be + /// equal to Tail. + bool isTriangle() const { return TBB == Tail || FBB == Tail; } + + /// Returns the Tail predecessor for the True side. + MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } + + /// Returns the Tail predecessor for the False side. + MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } + + /// Information about each phi in the Tail block. + struct PHIInfo { + MachineInstr *PHI; + unsigned TReg, FReg; + // Latencies from Cond+Branch, TReg, and FReg to DstReg. + int CondCycles, TCycles, FCycles; + + PHIInfo(MachineInstr *phi) + : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {} + }; + + SmallVector<PHIInfo, 8> PHIs; + +private: + /// The branch condition determined by AnalyzeBranch. + SmallVector<MachineOperand, 4> Cond; + + /// Instructions in Head that define values used by the conditional blocks. + /// The hoisted instructions must be inserted after these instructions. + SmallPtrSet<MachineInstr*, 8> InsertAfter; + + /// Register units clobbered by the conditional blocks. + BitVector ClobberedRegUnits; + + // Scratch pad for findInsertionPoint. + SparseSet<unsigned> LiveRegUnits; + + /// Insertion point in Head for speculatively executed instructions form TBB + /// and FBB. + MachineBasicBlock::iterator InsertionPoint; + + /// Return true if all non-terminator instructions in MBB can be safely + /// speculated. + bool canSpeculateInstrs(MachineBasicBlock *MBB); + + /// Find a valid insertion point in Head. + bool findInsertionPoint(); + + /// Replace PHI instructions in Tail with selects. + void replacePHIInstrs(); + + /// Insert selects and rewrite PHI operands to use them. + void rewritePHIOperands(); + +public: + /// runOnMachineFunction - Initialize per-function data structures. + void runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + LiveRegUnits.clear(); + LiveRegUnits.setUniverse(TRI->getNumRegUnits()); + ClobberedRegUnits.clear(); + ClobberedRegUnits.resize(TRI->getNumRegUnits()); + } + + /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, + /// initialize the internal state, and return true. + bool canConvertIf(MachineBasicBlock *MBB); + + /// convertIf - If-convert the last block passed to canConvertIf(), assuming + /// it is possible. Add any erased blocks to RemovedBlocks. + void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks); +}; +} // end anonymous namespace + + +/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely +/// be speculated. The terminators are not considered. +/// +/// If instructions use any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +/// +/// Any clobbered regunits are added to ClobberedRegUnits. +/// +bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { + // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to + // get right. + if (!MBB->livein_empty()) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); + return false; + } + + unsigned InstrCount = 0; + + // Check all instructions, except the terminators. It is assumed that + // terminators never have side effects or define any used register values. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); I != E; ++I) { + if (I->isDebugValue()) + continue; + + if (++InstrCount > BlockInstrLimit && !Stress) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " + << BlockInstrLimit << " instructions.\n"); + return false; + } + + // There shouldn't normally be any phis in a single-predecessor block. + if (I->isPHI()) { + DEBUG(dbgs() << "Can't hoist: " << *I); + return false; + } + + // Don't speculate loads. Note that it may be possible and desirable to + // speculate GOT or constant pool loads that are guaranteed not to trap, + // but we don't support that for now. + if (I->mayLoad()) { + DEBUG(dbgs() << "Won't speculate load: " << *I); + return false; + } + + // We never speculate stores, so an AA pointer isn't necessary. + bool DontMoveAcrossStore = true; + if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) { + DEBUG(dbgs() << "Can't speculate: " << *I); + return false; + } + + // Check for any dependencies on Head instructions. + for (MIOperands MO(I); MO.isValid(); ++MO) { + if (MO->isRegMask()) { + DEBUG(dbgs() << "Won't speculate regmask: " << *I); + return false; + } + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + + // Remember clobbered regunits. + if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + ClobberedRegUnits.set(*Units); + + if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (!DefMI || DefMI->getParent() != Head) + continue; + if (InsertAfter.insert(DefMI)) + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI); + if (DefMI->isTerminator()) { + DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); + return false; + } + } + } + return true; +} + + +/// Find an insertion point in Head for the speculated instructions. The +/// insertion point must be: +/// +/// 1. Before any terminators. +/// 2. After any instructions in InsertAfter. +/// 3. Not have any clobbered regunits live. +/// +/// This function sets InsertionPoint and returns true when successful, it +/// returns false if no valid insertion point could be found. +/// +bool SSAIfConv::findInsertionPoint() { + // Keep track of live regunits before the current position. + // Only track RegUnits that are also in ClobberedRegUnits. + LiveRegUnits.clear(); + SmallVector<unsigned, 8> Reads; + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + MachineBasicBlock::iterator I = Head->end(); + MachineBasicBlock::iterator B = Head->begin(); + while (I != B) { + --I; + // Some of the conditional code depends in I. + if (InsertAfter.count(I)) { + DEBUG(dbgs() << "Can't insert code after " << *I); + return false; + } + + // Update live regunits. + for (MIOperands MO(I); MO.isValid(); ++MO) { + // We're ignoring regmask operands. That is conservatively correct. + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // I clobbers Reg, so it isn't live before I. + if (MO->isDef()) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + LiveRegUnits.erase(*Units); + // Unless I reads Reg. + if (MO->readsReg()) + Reads.push_back(Reg); + } + // Anything read by I is live before I. + while (!Reads.empty()) + for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid(); + ++Units) + if (ClobberedRegUnits.test(*Units)) + LiveRegUnits.insert(*Units); + + // We can't insert before a terminator. + if (I != FirstTerm && I->isTerminator()) + continue; + + // Some of the clobbered registers are live before I, not a valid insertion + // point. + if (!LiveRegUnits.empty()) { + DEBUG({ + dbgs() << "Would clobber"; + for (SparseSet<unsigned>::const_iterator + i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i) + dbgs() << ' ' << PrintRegUnit(*i, TRI); + dbgs() << " live before " << *I; + }); + continue; + } + + // This is a valid insertion point. + InsertionPoint = I; + DEBUG(dbgs() << "Can insert before " << *I); + return true; + } + DEBUG(dbgs() << "No legal insertion point found.\n"); + return false; +} + + + +/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is +/// a potential candidate for if-conversion. Fill out the internal state. +/// +bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { + Head = MBB; + TBB = FBB = Tail = 0; + + if (Head->succ_size() != 2) + return false; + MachineBasicBlock *Succ0 = Head->succ_begin()[0]; + MachineBasicBlock *Succ1 = Head->succ_begin()[1]; + + // Canonicalize so Succ0 has MBB as its single predecessor. + if (Succ0->pred_size() != 1) + std::swap(Succ0, Succ1); + + if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) + return false; + + Tail = Succ0->succ_begin()[0]; + + // This is not a triangle. + if (Tail != Succ1) { + // Check for a diamond. We won't deal with any critical edges. + if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || + Succ1->succ_begin()[0] != Tail) + return false; + DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber() + << " -> BB#" << Succ0->getNumber() + << "/BB#" << Succ1->getNumber() + << " -> BB#" << Tail->getNumber() << '\n'); + + // Live-in physregs are tricky to get right when speculating code. + if (!Tail->livein_empty()) { + DEBUG(dbgs() << "Tail has live-ins.\n"); + return false; + } + } else { + DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() + << " -> BB#" << Succ0->getNumber() + << " -> BB#" << Tail->getNumber() << '\n'); + } + + // This is a triangle or a diamond. + // If Tail doesn't have any phis, there must be side effects. + if (Tail->empty() || !Tail->front().isPHI()) { + DEBUG(dbgs() << "No phis in tail.\n"); + return false; + } + + // The branch we're looking to eliminate must be analyzable. + Cond.clear(); + if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) { + DEBUG(dbgs() << "Branch not analyzable.\n"); + return false; + } + + // This is weird, probably some sort of degenerate CFG. + if (!TBB) { + DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n"); + return false; + } + + // AnalyzeBranch doesn't set FBB on a fall-through branch. + // Make sure it is always set. + FBB = TBB == Succ0 ? Succ1 : Succ0; + + // Any phis in the tail block must be convertible to selects. + PHIs.clear(); + MachineBasicBlock *TPred = getTPred(); + MachineBasicBlock *FPred = getFPred(); + for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); + I != E && I->isPHI(); ++I) { + PHIs.push_back(&*I); + PHIInfo &PI = PHIs.back(); + // Find PHI operands corresponding to TPred and FPred. + for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) { + if (PI.PHI->getOperand(i+1).getMBB() == TPred) + PI.TReg = PI.PHI->getOperand(i).getReg(); + if (PI.PHI->getOperand(i+1).getMBB() == FPred) + PI.FReg = PI.PHI->getOperand(i).getReg(); + } + assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI"); + assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI"); + + // Get target information. + if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, + PI.CondCycles, PI.TCycles, PI.FCycles)) { + DEBUG(dbgs() << "Can't convert: " << *PI.PHI); + return false; + } + } + + // Check that the conditional instructions can be speculated. + InsertAfter.clear(); + ClobberedRegUnits.reset(); + if (TBB != Tail && !canSpeculateInstrs(TBB)) + return false; + if (FBB != Tail && !canSpeculateInstrs(FBB)) + return false; + + // Try to find a valid insertion point for the speculated instructions in the + // head basic block. + if (!findInsertionPoint()) + return false; + + if (isTriangle()) + ++NumTrianglesSeen; + else + ++NumDiamondsSeen; + return true; +} + +/// replacePHIInstrs - Completely replace PHI instructions with selects. +/// This is possible when the only Tail predecessors are the if-converted +/// blocks. +void SSAIfConv::replacePHIInstrs() { + assert(Tail->pred_size() == 2 && "Cannot replace PHIs"); + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + DEBUG(dbgs() << "If-converting " << *PI.PHI); + assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands."); + unsigned DstReg = PI.PHI->getOperand(0).getReg(); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + PI.PHI->eraseFromParent(); + PI.PHI = 0; + } +} + +/// rewritePHIOperands - When there are additional Tail predecessors, insert +/// select instructions in Head and rewrite PHI operands to use the selects. +/// Keep the PHI instructions in Tail to handle the other predecessors. +void SSAIfConv::rewritePHIOperands() { + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + DEBUG(dbgs() << "If-converting " << *PI.PHI); + unsigned PHIDst = PI.PHI->getOperand(0).getReg(); + unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + + // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. + for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { + MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB(); + if (MBB == getTPred()) { + PI.PHI->getOperand(i-1).setMBB(Head); + PI.PHI->getOperand(i-2).setReg(DstReg); + } else if (MBB == getFPred()) { + PI.PHI->RemoveOperand(i-1); + PI.PHI->RemoveOperand(i-2); + } + } + DEBUG(dbgs() << " --> " << *PI.PHI); + } +} + +/// convertIf - Execute the if conversion after canConvertIf has determined the +/// feasibility. +/// +/// Any basic blocks erased will be added to RemovedBlocks. +/// +void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { + assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); + + // Update statistics. + if (isTriangle()) + ++NumTrianglesConv; + else + ++NumDiamondsConv; + + // Move all instructions into Head, except for the terminators. + if (TBB != Tail) + Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); + if (FBB != Tail) + Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); + + // Are there extra Tail predecessors? + bool ExtraPreds = Tail->pred_size() != 2; + if (ExtraPreds) + rewritePHIOperands(); + else + replacePHIInstrs(); + + // Fix up the CFG, temporarily leave Head without any successors. + Head->removeSuccessor(TBB); + Head->removeSuccessor(FBB); + if (TBB != Tail) + TBB->removeSuccessor(Tail); + if (FBB != Tail) + FBB->removeSuccessor(Tail); + + // Fix up Head's terminators. + // It should become a single branch or a fallthrough. + DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); + TII->RemoveBranch(*Head); + + // Erase the now empty conditional blocks. It is likely that Head can fall + // through to Tail, and we can join the two blocks. + if (TBB != Tail) { + RemovedBlocks.push_back(TBB); + TBB->eraseFromParent(); + } + if (FBB != Tail) { + RemovedBlocks.push_back(FBB); + FBB->eraseFromParent(); + } + + assert(Head->succ_empty() && "Additional head successors?"); + if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { + // Splice Tail onto the end of Head. + DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber() + << " into head BB#" << Head->getNumber() << '\n'); + Head->splice(Head->end(), Tail, + Tail->begin(), Tail->end()); + Head->transferSuccessorsAndUpdatePHIs(Tail); + RemovedBlocks.push_back(Tail); + Tail->eraseFromParent(); + } else { + // We need a branch to Tail, let code placement work it out later. + DEBUG(dbgs() << "Converting to unconditional branch.\n"); + SmallVector<MachineOperand, 0> EmptyCond; + TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL); + Head->addSuccessor(Tail); + } + DEBUG(dbgs() << *Head); +} + + +//===----------------------------------------------------------------------===// +// EarlyIfConverter Pass +//===----------------------------------------------------------------------===// + +namespace { +class EarlyIfConverter : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MCSchedModel *SchedModel; + MachineRegisterInfo *MRI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + SSAIfConv IfConv; + +public: + static char ID; + EarlyIfConverter() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &MF); + +private: + bool tryConvertIf(MachineBasicBlock*); + void updateDomTree(ArrayRef<MachineBasicBlock*> Removed); + void updateLoops(ArrayRef<MachineBasicBlock*> Removed); + void invalidateTraces(); + bool shouldConvertIf(); +}; +} // end anonymous namespace + +char EarlyIfConverter::ID = 0; +char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; + +INITIALIZE_PASS_BEGIN(EarlyIfConverter, + "early-ifcvt", "Early If Converter", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(EarlyIfConverter, + "early-ifcvt", "Early If Converter", false, false) + +void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineTraceMetrics>(); + AU.addPreserved<MachineTraceMetrics>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Update the dominator tree after if-conversion erased some blocks. +void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) { + // convertIf can remove TBB, FBB, and Tail can be merged into Head. + // TBB and FBB should not dominate any blocks. + // Tail children should be transferred to Head. + MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); + for (unsigned i = 0, e = Removed.size(); i != e; ++i) { + MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); + assert(Node != HeadNode && "Cannot erase the head node"); + while (Node->getNumChildren()) { + assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); + DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); + } + DomTree->eraseNode(Removed[i]); + } +} + +/// Update LoopInfo after if-conversion. +void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) { + if (!Loops) + return; + // If-conversion doesn't change loop structure, and it doesn't mess with back + // edges, so updating LoopInfo is simply removing the dead blocks. + for (unsigned i = 0, e = Removed.size(); i != e; ++i) + Loops->removeBlock(Removed[i]); +} + +/// Invalidate MachineTraceMetrics before if-conversion. +void EarlyIfConverter::invalidateTraces() { + Traces->verifyAnalysis(); + Traces->invalidate(IfConv.Head); + Traces->invalidate(IfConv.Tail); + Traces->invalidate(IfConv.TBB); + Traces->invalidate(IfConv.FBB); + Traces->verifyAnalysis(); +} + +// Adjust cycles with downward saturation. +static unsigned adjCycles(unsigned Cyc, int Delta) { + if (Delta < 0 && Cyc + Delta > Cyc) + return 0; + return Cyc + Delta; +} + +/// Apply cost model and heuristics to the if-conversion in IfConv. +/// Return true if the conversion is a good idea. +/// +bool EarlyIfConverter::shouldConvertIf() { + // Stress testing mode disables all cost considerations. + if (Stress) + return true; + + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + + MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred()); + MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); + DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); + unsigned MinCrit = std::min(TBBTrace.getCriticalPath(), + FBBTrace.getCriticalPath()); + + // Set a somewhat arbitrary limit on the critical path extension we accept. + unsigned CritLimit = SchedModel->MispredictPenalty/2; + + // If-conversion only makes sense when there is unexploited ILP. Compute the + // maximum-ILP resource length of the trace after if-conversion. Compare it + // to the shortest critical path. + SmallVector<const MachineBasicBlock*, 1> ExtraBlocks; + if (IfConv.TBB != IfConv.Tail) + ExtraBlocks.push_back(IfConv.TBB); + unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks); + DEBUG(dbgs() << "Resource length " << ResLength + << ", minimal critical path " << MinCrit << '\n'); + if (ResLength > MinCrit + CritLimit) { + DEBUG(dbgs() << "Not enough available ILP.\n"); + return false; + } + + // Assume that the depth of the first head terminator will also be the depth + // of the select instruction inserted, as determined by the flag dependency. + // TBB / FBB data dependencies may delay the select even more. + MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head); + unsigned BranchDepth = + HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth; + DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); + + // Look at all the tail phis, and compute the critical path extension caused + // by inserting select instructions. + MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); + for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) { + SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; + unsigned Slack = TailTrace.getInstrSlack(PI.PHI); + unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth; + DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); + + // The condition is pulled into the critical path. + unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles); + if (CondDepth > MaxDepth) { + unsigned Extra = CondDepth - MaxDepth; + DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The TBB value is pulled into the critical path. + unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles); + if (TDepth > MaxDepth) { + unsigned Extra = TDepth - MaxDepth; + DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The FBB value is pulled into the critical path. + unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles); + if (FDepth > MaxDepth) { + unsigned Extra = FDepth - MaxDepth; + DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + } + return true; +} + +/// Attempt repeated if-conversion on MBB, return true if successful. +/// +bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { + bool Changed = false; + while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { + // If-convert MBB and update analyses. + invalidateTraces(); + SmallVector<MachineBasicBlock*, 4> RemovedBlocks; + IfConv.convertIf(RemovedBlocks); + Changed = true; + updateDomTree(RemovedBlocks); + updateLoops(RemovedBlocks); + } + return Changed; +} + +bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" + << "********** Function: " + << ((Value*)MF.getFunction())->getName() << '\n'); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel; + MRI = &MF.getRegInfo(); + DomTree = &getAnalysis<MachineDominatorTree>(); + Loops = getAnalysisIfAvailable<MachineLoopInfo>(); + Traces = &getAnalysis<MachineTraceMetrics>(); + MinInstr = 0; + + bool Changed = false; + IfConv.runOnMachineFunction(MF); + + // Visit blocks in dominator tree post-order. The post-order enables nested + // if-conversion in a single pass. The tryConvertIf() function may erase + // blocks, but only blocks dominated by the head block. This makes it safe to + // update the dominator tree while the post-order iterator is still active. + for (po_iterator<MachineDominatorTree*> + I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I) + if (tryConvertIf(I->getBlock())) + Changed = true; + + MF.verify(this, "After early if-conversion"); + return Changed; +} diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index a48c540..fee8e47 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -59,7 +59,7 @@ struct DomainValue { // Pointer to the next DomainValue in a chain. When two DomainValues are // merged, Victim.Next is set to point to Victor, so old DomainValue - // references can be updated by folowing the chain. + // references can be updated by following the chain. DomainValue *Next; // Twiddleable instructions using or defining these registers. @@ -666,7 +666,8 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { // or -1. AliasMap.resize(TRI->getNumRegs(), -1); for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) - for (const uint16_t *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) + for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); + AI.isValid(); ++AI) AliasMap[*AI] = i; } diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index b14afc2..7a17331 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -131,13 +131,16 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } else { TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, MI->getOperand(2).isKill()); + + // Implicitly define DstReg for subsequent uses. + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + CopyMI->addRegisterDefined(DstReg); + // Transfer the kill/dead flags, if needed. if (MI->getOperand(0).isDead()) TransferDeadFlag(MI, DstSubReg, TRI); - DEBUG({ - MachineBasicBlock::iterator dMI = MI; - dbgs() << "subreg: " << *(--dMI); - }); + DEBUG(dbgs() << "subreg: " << *CopyMI); } DEBUG(dbgs() << '\n'); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 75ae5b9..4214ba1 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -155,7 +156,9 @@ namespace { const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; const MachineBranchProbabilityInfo *MBPI; + MachineRegisterInfo *MRI; + bool PreRegAlloc; bool MadeChange; int FnNum; public: @@ -263,14 +266,20 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MRI = &MF.getRegInfo(); InstrItins = MF.getTarget().getInstrItineraryData(); if (!TII) return false; - // Tail merge tend to expose more if-conversion opportunities. - BranchFolder BF(true, false); - bool BFChange = BF.OptimizeFunction(MF, TII, + PreRegAlloc = MRI->isSSA(); + + bool BFChange = false; + if (!PreRegAlloc) { + // Tail merge tend to expose more if-conversion opportunities. + BranchFolder BF(true, false); + BFChange = BF.OptimizeFunction(MF, TII, MF.getTarget().getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); + } DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" << MF.getFunction()->getName() << "\'"); @@ -621,7 +630,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (BBI.IsDone) return; - bool AlreadyPredicated = BBI.Predicate.size() > 0; + bool AlreadyPredicated = !BBI.Predicate.empty(); // First analyze the end of BB branches. BBI.TrueBB = BBI.FalseBB = NULL; BBI.BrCond.clear(); @@ -786,8 +795,8 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, unsigned Dups = 0; unsigned Dups2 = 0; - bool TNeedSub = TrueBBI.Predicate.size() > 0; - bool FNeedSub = FalseBBI.Predicate.size() > 0; + bool TNeedSub = !TrueBBI.Predicate.empty(); + bool FNeedSub = !FalseBBI.Predicate.empty(); bool Enqueued = false; BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB); @@ -962,9 +971,8 @@ static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs, E = BB->livein_end(); I != E; ++I) { unsigned Reg = *I; Redefs.insert(Reg); - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - Redefs.insert(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Redefs.insert(*SubRegs); } } @@ -983,8 +991,8 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, Defs.push_back(Reg); else if (MO.isKill()) { Redefs.erase(Reg); - for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - Redefs.erase(*SR); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Redefs.erase(*SubRegs); } } for (unsigned i = 0, e = Defs.size(); i != e; ++i) { @@ -993,11 +1001,12 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, if (AddImpUse) // Treat predicated update as read + write. MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, - true/*IsImp*/,false/*IsKill*/)); + true/*IsImp*/,false/*IsKill*/, + false/*IsDead*/,true/*IsUndef*/)); } else { Redefs.insert(Reg); - for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - Redefs.insert(*SR); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Redefs.insert(*SubRegs); } } } @@ -1335,8 +1344,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // These are defined before ctrl flow reach the 'false' instructions. // They cannot be modified by the 'true' instructions. ExtUses.insert(Reg); - for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - ExtUses.insert(*SR); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + ExtUses.insert(*SubRegs); } } @@ -1344,8 +1353,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, unsigned Reg = Defs[i]; if (!ExtUses.count(Reg)) { RedefsByFalse.insert(Reg); - for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - RedefsByFalse.insert(*SR); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + RedefsByFalse.insert(*SubRegs); } } } diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index d5ea666..07e37af 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -52,7 +52,6 @@ static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden, namespace { class InlineSpiller : public Spiller { - MachineFunctionPass &Pass; MachineFunction &MF; LiveIntervals &LIS; LiveStacks &LSS; @@ -137,8 +136,7 @@ public: InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) - : Pass(pass), - MF(mf), + : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()), LSS(pass.getAnalysis<LiveStacks>()), AA(&pass.getAnalysis<AliasAnalysis>()), @@ -578,11 +576,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); - LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true)); - assert(SrcLR && "Copy from non-existing value"); + LiveRangeQuery SrcQ(SrcLI, VNI->def); + assert(SrcQ.valueIn() && "Copy from non-existing value"); // Check if this COPY kills its source. - SVI->second.KillsSource = (SrcLR->end == VNI->def); - VNInfo *SrcVNI = SrcLR->valno; + SVI->second.KillsSource = SrcQ.isKill(); + VNInfo *SrcVNI = SrcQ.valueIn(); DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':' << SrcVNI->id << '@' << SrcVNI->def << " kill=" << unsigned(SVI->second.KillsSource) << '\n'); @@ -1083,6 +1081,10 @@ void InlineSpiller::insertReload(LiveInterval &NewLI, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); + // Some (out-of-tree) targets have EC reload instructions. + if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg)) + if (MO->isEarlyClobber()) + LoadIdx = LoadIdx.getRegSlot(true); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); @@ -1275,8 +1277,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { DEBUG(dbgs() << "Inline spilling " << MRI.getRegClass(edit.getReg())->getName() - << ':' << edit.getParent() << "\nFrom original " - << LIS.getInterval(Original) << '\n'); + << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent() + << "\nFrom original " << LIS.getInterval(Original) << '\n'); assert(edit.getParent().isSpillable() && "Attempting to spill already spilled value."); assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 8368b58..1541bf0 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -39,7 +39,7 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { unsigned E = PhysRegEntries[PhysReg]; if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) { if (!Entries[E].valid(LIUArray, TRI)) - Entries[E].revalidate(); + Entries[E].revalidate(LIUArray, TRI); return &Entries[E]; } // No valid entry exists, pick the next round-robin entry. @@ -61,13 +61,15 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { } /// revalidate - LIU contents have changed, update tags. -void InterferenceCache::Entry::revalidate() { +void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI) { // Invalidate all block entries. ++Tag; // Invalidate all iterators. PrevPos = SlotIndex(); - for (unsigned i = 0, e = Aliases.size(); i != e; ++i) - Aliases[i].second = Aliases[i].first->getTag(); + unsigned i = 0; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) + RegUnits[i].VirtTag = LIUArray[*Units].getTag(); } void InterferenceCache::Entry::reset(unsigned physReg, @@ -79,28 +81,23 @@ void InterferenceCache::Entry::reset(unsigned physReg, ++Tag; PhysReg = physReg; Blocks.resize(MF->getNumBlockIDs()); - Aliases.clear(); - for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) { - LiveIntervalUnion *LIU = LIUArray + *AS; - Aliases.push_back(std::make_pair(LIU, LIU->getTag())); - } // Reset iterators. PrevPos = SlotIndex(); - unsigned e = Aliases.size(); - Iters.resize(e); - for (unsigned i = 0; i != e; ++i) - Iters[i].setMap(Aliases[i].first->getMap()); + RegUnits.clear(); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + RegUnits.push_back(LIUArray[*Units]); + RegUnits.back().Fixed = &LIS->getRegUnit(*Units); + } } bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI) { - unsigned i = 0, e = Aliases.size(); - for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) { - LiveIntervalUnion *LIU = LIUArray + *AS; - if (i == e || Aliases[i].first != LIU) + unsigned i = 0, e = RegUnits.size(); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) { + if (i == e) return false; - if (LIU->changedSince(Aliases[i].second)) + if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag)) return false; } return i == e; @@ -112,12 +109,20 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { // Use advanceTo only when possible. if (PrevPos != Start) { - if (!PrevPos.isValid() || Start < PrevPos) - for (unsigned i = 0, e = Iters.size(); i != e; ++i) - Iters[i].find(Start); - else - for (unsigned i = 0, e = Iters.size(); i != e; ++i) - Iters[i].advanceTo(Start); + if (!PrevPos.isValid() || Start < PrevPos) { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + RegUnitInfo &RUI = RegUnits[i]; + RUI.VirtI.find(Start); + RUI.FixedI = RUI.Fixed->find(Start); + } + } else { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + RegUnitInfo &RUI = RegUnits[i]; + RUI.VirtI.advanceTo(Start); + if (RUI.FixedI != RUI.Fixed->end()) + RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start); + } + } PrevPos = Start; } @@ -129,9 +134,9 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BI->Tag = Tag; BI->First = BI->Last = SlotIndex(); - // Check for first interference. - for (unsigned i = 0, e = Iters.size(); i != e; ++i) { - Iter &I = Iters[i]; + // Check for first interference from virtregs. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; if (!I.valid()) continue; SlotIndex StartI = I.start(); @@ -141,6 +146,19 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BI->First = StartI; } + // Same thing for fixed interference. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveInterval::const_iterator I = RegUnits[i].FixedI; + LiveInterval::const_iterator E = RegUnits[i].Fixed->end(); + if (I == E) + continue; + SlotIndex StartI = I->start; + if (StartI >= Stop) + continue; + if (!BI->First.isValid() || StartI < BI->First) + BI->First = StartI; + } + // Also check for register mask interference. RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum); RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum); @@ -168,8 +186,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { } // Check for last interference in block. - for (unsigned i = 0, e = Iters.size(); i != e; ++i) { - Iter &I = Iters[i]; + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; if (!I.valid() || I.start() >= Stop) continue; I.advanceTo(Stop); @@ -183,6 +201,23 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { ++I; } + // Fixed interference. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveInterval::iterator &I = RegUnits[i].FixedI; + LiveInterval *LI = RegUnits[i].Fixed; + if (I == LI->end() || I->start >= Stop) + continue; + I = LI->advanceTo(I, Stop); + bool Backup = I == LI->end() || I->start >= Stop; + if (Backup) + --I; + SlotIndex StopI = I->end; + if (!BI->Last.isValid() || StopI > BI->Last) + BI->Last = StopI; + if (Backup) + ++I; + } + // Also check for register mask interference. SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start; for (unsigned i = RegMaskSlots.size(); diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 485a325..3c928a5 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// InterferenceCache remembers per-block interference in LiveIntervalUnions. +// InterferenceCache remembers per-block interference from LiveIntervalUnions, +// fixed RegUnit interference, and register masks. // //===----------------------------------------------------------------------===// @@ -59,14 +60,31 @@ class InterferenceCache { /// PrevPos - The previous position the iterators were moved to. SlotIndex PrevPos; - /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of - /// PhysReg. - SmallVector<std::pair<LiveIntervalUnion*, unsigned>, 8> Aliases; + /// RegUnitInfo - Information tracked about each RegUnit in PhysReg. + /// When PrevPos is set, the iterators are valid as if advanceTo(PrevPos) + /// had just been called. + struct RegUnitInfo { + /// Iterator pointing into the LiveIntervalUnion containing virtual + /// register interference. + LiveIntervalUnion::SegmentIter VirtI; - typedef LiveIntervalUnion::SegmentIter Iter; + /// Tag of the LIU last time we looked. + unsigned VirtTag; - /// Iters - an iterator for each alias - SmallVector<Iter, 8> Iters; + /// Fixed interference in RegUnit. + LiveInterval *Fixed; + + /// Iterator pointing into the fixed RegUnit interference. + LiveInterval::iterator FixedI; + + RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()), Fixed(0) { + VirtI.setMap(LIU.getMap()); + } + }; + + /// Info for each RegUnit in PhysReg. It is very rare ofr a PHysReg to have + /// more than 4 RegUnits. + SmallVector<RegUnitInfo, 4> RegUnits; /// Blocks - Interference for each block in the function. SmallVector<BlockInterference, 8> Blocks; @@ -91,7 +109,7 @@ class InterferenceCache { bool hasRefs() const { return RefCount > 0; } - void revalidate(); + void revalidate(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); /// valid - Return true if this is a valid entry for physReg. bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index a9ca42f..8d2282a 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" #include "llvm/Module.h" #include "llvm/Type.h" -#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; template <class ArgIt> diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index a1f479a..cac0c83 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/PassManager.h" +#include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -78,40 +79,15 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, "and that InitializeAllTargetMCs() is being invoked!"); } -/// Turn exception handling constructs into something the code generators can -/// handle. -static void addPassesToHandleExceptions(TargetMachine *TM, - PassManagerBase &PM) { - switch (TM->getMCAsmInfo()->getExceptionHandlingType()) { - case ExceptionHandling::SjLj: - // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both - // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, - // catch info can get misplaced when a selector ends up more than one block - // removed from the parent invoke(s). This could happen when a landing - // pad is shared by multiple invokes and is also a target of a normal - // edge from elsewhere. - PM.add(createSjLjEHPreparePass(TM->getTargetLowering())); - // FALLTHROUGH - case ExceptionHandling::DwarfCFI: - case ExceptionHandling::ARM: - case ExceptionHandling::Win64: - PM.add(createDwarfEHPass(TM)); - break; - case ExceptionHandling::None: - PM.add(createLowerInvokePass(TM->getTargetLowering())); - - // The lower invoke pass may create unreachable code. Remove it. - PM.add(createUnreachableBlockEliminationPass()); - break; - } -} - /// addPassesToX helper drives creation and initialization of TargetPassConfig. static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, - bool DisableVerify) { + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { // Targets may override createPassConfig to provide a target-specific sublass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); + PassConfig->setStartStopPasses(StartAfter, StopAfter); // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); @@ -120,7 +96,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassConfig->addIRPasses(); - addPassesToHandleExceptions(TM, PM); + PassConfig->addPassesToHandleExceptions(); PassConfig->addISelPrepare(); @@ -155,16 +131,30 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - bool DisableVerify) { + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { // Add common CodeGen passes. - MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify); + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, + StartAfter, StopAfter); if (!Context) return true; + if (StopAfter) { + // FIXME: The intent is that this should eventually write out a YAML file, + // containing the LLVM IR, the machine-level IR (when stopping after a + // machine-level pass), and whatever other information is needed to + // deserialize the code and resume compilation. For now, just write the + // LLVM IR. + PM.add(createPrintModulePass(&Out)); + return false; + } + if (hasMCSaveTempLabels()) Context->setAllowTemporaryLabels(false); const MCAsmInfo &MAI = *getMCAsmInfo(); + const MCRegisterInfo &MRI = *getRegisterInfo(); const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); OwningPtr<MCStreamer> AsmStreamer; @@ -180,7 +170,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, MCAsmBackend *MAB = 0; if (ShowMCEncoding) { const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI, *Context); + MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, + *Context); MAB = getTarget().createMCAsmBackend(getTargetTriple()); } @@ -198,8 +189,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI, - *Context); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, + STI, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); if (MCE == 0 || MAB == 0) return true; @@ -242,7 +233,7 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &JCE, bool DisableVerify) { // Add common CodeGen passes. - MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify); + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); if (!Context) return true; @@ -262,7 +253,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, raw_ostream &Out, bool DisableVerify) { // Add common CodeGen passes. - Ctx = addPassesToGenerateCode(this, PM, DisableVerify); + Ctx = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); if (!Ctx) return true; @@ -271,9 +262,10 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. + const MCRegisterInfo &MRI = *getRegisterInfo(); const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, - *Ctx); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, + STI, *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); if (MCE == 0 || MAB == 0) return true; diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index f1abcbb..6b6b9d0 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -16,8 +16,8 @@ #define DEBUG_TYPE "lexicalscopes" #include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 2187833..d631726 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -23,9 +23,9 @@ #include "LiveDebugVariables.h" #include "VirtRegMap.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Metadata.h" #include "llvm/Value.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LexicalScopes.h" @@ -243,7 +243,7 @@ public: /// computeIntervals - Compute the live intervals of all locations after /// collecting all their def points. - void computeIntervals(MachineRegisterInfo &MRI, + void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS); @@ -618,6 +618,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, void UserValue::computeIntervals(MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS) { @@ -634,15 +635,32 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, unsigned LocNo = Defs[i].second; const MachineOperand &Loc = locations[LocNo]; + if (!Loc.isReg()) { + extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS); + continue; + } + // Register locations are constrained to where the register value is live. - if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) { - LiveInterval *LI = &LIS.getInterval(Loc.getReg()); - const VNInfo *VNI = LI->getVNInfoAt(Idx); + if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { + LiveInterval *LI = 0; + const VNInfo *VNI = 0; + if (LIS.hasInterval(Loc.getReg())) { + LI = &LIS.getInterval(Loc.getReg()); + VNI = LI->getVNInfoAt(Idx); + } SmallVector<SlotIndex, 16> Kills; extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS); - addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); - } else - extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS); + if (LI) + addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); + continue; + } + + // For physregs, use the live range of the first regunit as a guide. + unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI); + LiveInterval *LI = &LIS.getRegUnit(Unit); + const VNInfo *VNI = LI->getVNInfoAt(Idx); + // Don't track copies from physregs, it is too expensive. + extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS); } // Finally, erase all the undefs. @@ -656,7 +674,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, void LDVImpl::computeIntervals() { for (unsigned i = 0, e = userValues.size(); i != e; ++i) { UserValueScopes UVS(userValues[i]->getDebugLoc(), LS); - userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT, UVS); + userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS); userValues[i]->mapVirtRegs(this); } } @@ -721,7 +739,8 @@ renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) { if (TargetRegisterInfo::isVirtualRegister(NewReg)) mapVirtReg(NewReg, UV); - virtRegToEqClass.erase(OldReg); + if (OldReg != NewReg) + virtRegToEqClass.erase(OldReg); do { UV->renameRegister(OldReg, NewReg, SubIdx, TRI); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index ac18843..0a795e6 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -48,6 +48,26 @@ LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { return I; } +VNInfo *LiveInterval::createDeadDef(SlotIndex Def, + VNInfo::Allocator &VNInfoAllocator) { + assert(!Def.isDead() && "Cannot define a value at the dead slot"); + iterator I = find(Def); + if (I == end()) { + VNInfo *VNI = getNextValue(Def, VNInfoAllocator); + ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI)); + return VNI; + } + if (SlotIndex::isSameInstr(Def, I->start)) { + assert(I->start == Def && "Cannot insert def, already live"); + assert(I->valno->def == Def && "Inconsistent existing value def"); + return I->valno; + } + assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); + VNInfo *VNI = getNextValue(Def, VNInfoAllocator); + ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI)); + return VNI; +} + /// killedInRange - Return true if the interval has kills in [Start,End). bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const { Ranges::const_iterator r = @@ -140,7 +160,7 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { valnos.pop_back(); } while (!valnos.empty() && valnos.back()->isUnused()); } else { - ValNo->setIsUnused(true); + ValNo->markUnused(); } } @@ -176,16 +196,16 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { // If NewEnd was in the middle of an interval, make sure to get its endpoint. I->end = std::max(NewEnd, prior(MergeTo)->end); - // Erase any dead ranges. - ranges.erase(llvm::next(I), MergeTo); - // If the newly formed range now touches the range after it and if they have // the same value number, merge the two ranges into one range. - Ranges::iterator Next = llvm::next(I); - if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) { - I->end = Next->end; - ranges.erase(Next); + if (MergeTo != ranges.end() && MergeTo->start <= I->end && + MergeTo->valno == ValNo) { + I->end = MergeTo->end; + ++MergeTo; } + + // Erase any dead ranges. + ranges.erase(llvm::next(I), MergeTo); } @@ -353,18 +373,6 @@ void LiveInterval::removeValNo(VNInfo *ValNo) { markValNoForDeletion(ValNo); } -/// findDefinedVNInfo - Find the VNInfo defined by the specified -/// index (register interval). -VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const { - for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end(); - i != e; ++i) { - if ((*i)->def == Idx) - return *i; - } - - return 0; -} - /// join - Join two live intervals (this, and other) together. This applies /// mappings to the value numbers in the LHS/RHS intervals as specified. If /// the intervals are not joinable, this aborts. @@ -373,6 +381,8 @@ void LiveInterval::join(LiveInterval &Other, const int *RHSValNoAssignments, SmallVector<VNInfo*, 16> &NewVNInfo, MachineRegisterInfo *MRI) { + verify(); + // Determine if any of our live range values are mapped. This is uncommon, so // we want to avoid the interval scan if not. bool MustMapCurValNos = false; @@ -440,16 +450,148 @@ void LiveInterval::join(LiveInterval &Other, valnos.resize(NumNewVals); // shrinkify // Okay, now insert the RHS live ranges into the LHS. - iterator InsertPos = begin(); unsigned RangeNo = 0; for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) { // Map the valno in the other live range to the current live range. I->valno = NewVNInfo[OtherAssignments[RangeNo]]; assert(I->valno && "Adding a dead range?"); - InsertPos = addRangeFrom(*I, InsertPos); + } + mergeIntervalRanges(Other); + + verify(); +} + +/// \brief Helper function for merging in another LiveInterval's ranges. +/// +/// This is a helper routine implementing an efficient merge of another +/// LiveIntervals ranges into the current interval. +/// +/// \param LHSValNo If non-NULL, set as the new value number for every range +/// from RHS which is merged into the LHS. +/// \param RHSValNo If non-NULL, then only ranges in RHS whose original value +/// number maches this value number will be merged into LHS. +void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS, + VNInfo *LHSValNo, + const VNInfo *RHSValNo) { + if (RHS.empty()) + return; + + // Ensure we're starting with a valid range. Note that we don't verify RHS + // because it may have had its value numbers adjusted in preparation for + // merging. + verify(); + + // The strategy for merging these efficiently is as follows: + // + // 1) Find the beginning of the impacted ranges in the LHS. + // 2) Create a new, merged sub-squence of ranges merging from the position in + // #1 until either LHS or RHS is exhausted. Any part of LHS between RHS + // entries being merged will be copied into this new range. + // 3) Replace the relevant section in LHS with these newly merged ranges. + // 4) Append any remaning ranges from RHS if LHS is exhausted in #2. + // + // We don't follow the typical in-place merge strategy for sorted ranges of + // appending the new ranges to the back and then using std::inplace_merge + // because one step of the merge can both mutate the original elements and + // remove elements from the original. Essentially, because the merge includes + // collapsing overlapping ranges, a more complex approach is required. + + // We do an initial binary search to optimize for a common pattern: a large + // LHS, and a very small RHS. + const_iterator RI = RHS.begin(), RE = RHS.end(); + iterator LE = end(), LI = std::upper_bound(begin(), LE, *RI); + + // Merge into NewRanges until one of the ranges is exhausted. + SmallVector<LiveRange, 4> NewRanges; + + // Keep track of where to begin the replacement. + iterator ReplaceI = LI; + + // If there are preceding ranges in the LHS, put the last one into NewRanges + // so we can optionally extend it. Adjust the replacement point accordingly. + if (LI != begin()) { + ReplaceI = llvm::prior(LI); + NewRanges.push_back(*ReplaceI); + } + + // Now loop over the mergable portions of both LHS and RHS, merging into + // NewRanges. + while (LI != LE && RI != RE) { + // Skip incoming ranges with the wrong value. + if (RHSValNo && RI->valno != RHSValNo) { + ++RI; + continue; + } + + // Select the first range. We pick the earliest start point, and then the + // largest range. + LiveRange R = *LI; + if (*RI < R) { + R = *RI; + ++RI; + if (LHSValNo) + R.valno = LHSValNo; + } else { + ++LI; + } + + if (NewRanges.empty()) { + NewRanges.push_back(R); + continue; + } + + LiveRange &LastR = NewRanges.back(); + if (R.valno == LastR.valno) { + // Try to merge this range into the last one. + if (R.start <= LastR.end) { + LastR.end = std::max(LastR.end, R.end); + continue; + } + } else { + // We can't merge ranges across a value number. + assert(R.start >= LastR.end && + "Cannot overlap two LiveRanges with differing ValID's"); + } + + // If all else fails, just append the range. + NewRanges.push_back(R); + } + assert(RI == RE || LI == LE); + + // Check for being able to merge into the trailing sequence of ranges on the LHS. + if (!NewRanges.empty()) + for (; LI != LE && (LI->valno == NewRanges.back().valno && + LI->start <= NewRanges.back().end); + ++LI) + NewRanges.back().end = std::max(NewRanges.back().end, LI->end); + + // Replace the ranges in the LHS with the newly merged ones. It would be + // really nice if there were a move-supporting 'replace' directly in + // SmallVector, but as there is not, we pay the price of copies to avoid + // wasted memory allocations. + SmallVectorImpl<LiveRange>::iterator NRI = NewRanges.begin(), + NRE = NewRanges.end(); + for (; ReplaceI != LI && NRI != NRE; ++ReplaceI, ++NRI) + *ReplaceI = *NRI; + if (NRI == NRE) + ranges.erase(ReplaceI, LI); + else + ranges.insert(LI, NRI, NRE); + + // And finally insert any trailing end of RHS (if we have one). + for (; RI != RE; ++RI) { + LiveRange R = *RI; + if (LHSValNo) + R.valno = LHSValNo; + if (!ranges.empty() && + ranges.back().valno == R.valno && R.start <= ranges.back().end) + ranges.back().end = std::max(ranges.back().end, R.end); + else + ranges.push_back(R); } - ComputeJoinedWeight(Other); + // Ensure we finished with a valid new sequence of ranges. + verify(); } /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live @@ -458,38 +600,20 @@ void LiveInterval::join(LiveInterval &Other, /// the overlapping LiveRanges have the specified value number. void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, VNInfo *LHSValNo) { - // TODO: Make this more efficient. - iterator InsertPos = begin(); - for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { - // Map the valno in the other live range to the current live range. - LiveRange Tmp = *I; - Tmp.valno = LHSValNo; - InsertPos = addRangeFrom(Tmp, InsertPos); - } + mergeIntervalRanges(RHS, LHSValNo); } - /// MergeValueInAsValue - Merge all of the live ranges of a specific val# /// in RHS into this live interval as the specified value number. /// The LiveRanges in RHS are allowed to overlap with LiveRanges in the /// current interval, it will replace the value numbers of the overlaped /// live ranges with the specified value number. -void LiveInterval::MergeValueInAsValue( - const LiveInterval &RHS, - const VNInfo *RHSValNo, VNInfo *LHSValNo) { - // TODO: Make this more efficient. - iterator InsertPos = begin(); - for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { - if (I->valno != RHSValNo) - continue; - // Map the valno in the other live range to the current live range. - LiveRange Tmp = *I; - Tmp.valno = LHSValNo; - InsertPos = addRangeFrom(Tmp, InsertPos); - } +void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, + const VNInfo *RHSValNo, + VNInfo *LHSValNo) { + mergeIntervalRanges(RHS, LHSValNo, RHSValNo); } - /// MergeValueNumberInto - This method is called when two value nubmers /// are found to be equivalent. This eliminates V1, replacing all /// LiveRanges with the V1 value number with the V2 value number. This can @@ -543,9 +667,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { } } - // Merge the relevant flags. - V2->mergeFlags(V1); - // Now that V1 is dead, remove it. markValNoForDeletion(V1); @@ -569,6 +690,8 @@ void LiveInterval::Copy(const LiveInterval &RHS, const LiveRange &LR = RHS.ranges[i]; addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id))); } + + verify(); } unsigned LiveInterval::getSize() const { @@ -578,29 +701,6 @@ unsigned LiveInterval::getSize() const { return Sum; } -/// ComputeJoinedWeight - Set the weight of a live interval Joined -/// after Other has been merged into it. -void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) { - // If either of these intervals was spilled, the weight is the - // weight of the non-spilled interval. This can only happen with - // iterative coalescers. - - if (Other.weight != HUGE_VALF) { - weight += Other.weight; - } - else if (weight == HUGE_VALF && - !TargetRegisterInfo::isPhysicalRegister(reg)) { - // Remove this assert if you have an iterative coalescer - assert(0 && "Joining to spilled interval"); - weight = Other.weight; - } - else { - // Otherwise the weight stays the same - // Remove this assert if you have an iterative coalescer - assert(0 && "Joining from spilled interval"); - } -} - raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; } @@ -609,15 +709,10 @@ void LiveRange::dump() const { dbgs() << *this << "\n"; } -void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { - OS << PrintReg(reg, TRI); - if (weight != 0) - OS << ',' << weight; - +void LiveInterval::print(raw_ostream &OS) const { if (empty()) - OS << " EMPTY"; + OS << "EMPTY"; else { - OS << " = "; for (LiveInterval::Ranges::const_iterator I = ranges.begin(), E = ranges.end(); I != E; ++I) { OS << *I; @@ -639,9 +734,7 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { } else { OS << vni->def; if (vni->isPHIDef()) - OS << "-phidef"; - if (vni->hasPHIKill()) - OS << "-phikill"; + OS << "-phi"; } } } @@ -651,6 +744,23 @@ void LiveInterval::dump() const { dbgs() << *this << "\n"; } +#ifndef NDEBUG +void LiveInterval::verify() const { + for (const_iterator I = begin(), E = end(); I != E; ++I) { + assert(I->start.isValid()); + assert(I->end.isValid()); + assert(I->start < I->end); + assert(I->valno != 0); + assert(I->valno == valnos[I->valno->id]); + if (llvm::next(I) != E) { + assert(I->end <= llvm::next(I)->start); + if (I->end == llvm::next(I)->start) + assert(I->valno != llvm::next(I)->valno); + } + } +} +#endif + void LiveRange::print(raw_ostream &os) const { os << *this; @@ -712,13 +822,13 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); ++RI; - if (MO.isUse() && MO.isUndef()) - continue; // DBG_VALUE instructions should have been eliminated earlier. - SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = Idx.getRegSlot(MO.isUse()); - const VNInfo *VNI = LI.getVNInfoAt(Idx); - assert(VNI && "Interval not live at use."); + LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI)); + const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); + // In the case of an <undef> use that isn't tied to any def, VNI will be + // NULL. If the use is tied to a def, VNI will be the defined value. + if (!VNI) + continue; MO.setReg(LIV[getEqClass(VNI)]->reg); } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 934cc12..d0f8ae1 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -20,6 +20,7 @@ #include "llvm/Value.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -31,20 +32,20 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "LiveRangeCalc.h" #include <algorithm> #include <limits> #include <cmath> using namespace llvm; -// Hidden options for help debugging. -static cl::opt<bool> DisableReMat("disable-rematerialization", - cl::init(false), cl::Hidden); - -STATISTIC(numIntervals , "Number of original intervals"); +// Switch to the new experimental algorithm for computing live intervals. +static cl::opt<bool> +NewLiveIntervals("new-live-intervals", cl::Hidden, + cl::desc("Use new algorithm forcomputing live intervals")); char LiveIntervals::ID = 0; +char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -61,23 +62,35 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<LiveVariables>(); AU.addPreserved<LiveVariables>(); AU.addPreservedID(MachineLoopInfoID); + AU.addRequiredTransitiveID(MachineDominatorsID); AU.addPreservedID(MachineDominatorsID); AU.addPreserved<SlotIndexes>(); AU.addRequiredTransitive<SlotIndexes>(); MachineFunctionPass::getAnalysisUsage(AU); } +LiveIntervals::LiveIntervals() : MachineFunctionPass(ID), + DomTree(0), LRCalc(0) { + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); +} + +LiveIntervals::~LiveIntervals() { + delete LRCalc; +} + void LiveIntervals::releaseMemory() { // Free the live intervals themselves. - for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(), - E = r2iMap_.end(); I != E; ++I) - delete I->second; - - r2iMap_.clear(); + for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i) + delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)]; + VirtRegIntervals.clear(); RegMaskSlots.clear(); RegMaskBits.clear(); RegMaskBlocks.clear(); + for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) + delete RegUnitIntervals[i]; + RegUnitIntervals.clear(); + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. VNInfoAllocator.Reset(); } @@ -85,20 +98,34 @@ void LiveIntervals::releaseMemory() { /// runOnMachineFunction - Register allocate the whole function /// bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { - mf_ = &fn; - mri_ = &mf_->getRegInfo(); - tm_ = &fn.getTarget(); - tri_ = tm_->getRegisterInfo(); - tii_ = tm_->getInstrInfo(); - aa_ = &getAnalysis<AliasAnalysis>(); - lv_ = &getAnalysis<LiveVariables>(); - indexes_ = &getAnalysis<SlotIndexes>(); - allocatableRegs_ = tri_->getAllocatableSet(fn); - reservedRegs_ = tri_->getReservedRegs(fn); - - computeIntervals(); - - numIntervals += getNumIntervals(); + MF = &fn; + MRI = &MF->getRegInfo(); + TM = &fn.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + AA = &getAnalysis<AliasAnalysis>(); + LV = &getAnalysis<LiveVariables>(); + Indexes = &getAnalysis<SlotIndexes>(); + DomTree = &getAnalysis<MachineDominatorTree>(); + if (!LRCalc) + LRCalc = new LiveRangeCalc(); + AllocatableRegs = TRI->getAllocatableSet(fn); + ReservedRegs = TRI->getReservedRegs(fn); + + // Allocate space for all virtual registers. + VirtRegIntervals.resize(MRI->getNumVirtRegs()); + + if (NewLiveIntervals) { + // This is the new way of computing live intervals. + // It is independent of LiveVariables, and it can run at any time. + computeVirtRegs(); + computeRegMasks(); + } else { + // This is the old way of computing live intervals. + // It depends on LiveVariables. + computeIntervals(); + } + computeLiveInRegUnits(); DEBUG(dump()); return true; @@ -108,27 +135,24 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << "********** INTERVALS **********\n"; - // Dump the physregs. - for (unsigned Reg = 1, RegE = tri_->getNumRegs(); Reg != RegE; ++Reg) - if (const LiveInterval *LI = r2iMap_.lookup(Reg)) { - LI->print(OS, tri_); - OS << '\n'; - } + // Dump the regunits. + for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) + if (LiveInterval *LI = RegUnitIntervals[i]) + OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n'; // Dump the virtregs. - for (unsigned Reg = 0, RegE = mri_->getNumVirtRegs(); Reg != RegE; ++Reg) - if (const LiveInterval *LI = - r2iMap_.lookup(TargetRegisterInfo::index2VirtReg(Reg))) { - LI->print(OS, tri_); - OS << '\n'; - } + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (hasInterval(Reg)) + OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; + } printInstrs(OS); } void LiveIntervals::printInstrs(raw_ostream &OS) const { OS << "********** MACHINEINSTRS **********\n"; - mf_->print(OS, indexes_); + MF->print(OS, Indexes); } void LiveIntervals::dumpInstrs() const { @@ -176,13 +200,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineOperand& MO, unsigned MOIdx, LiveInterval &interval) { - DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); + DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, TRI)); // Virtual registers may be defined multiple times (due to phi // elimination and 2-addr elimination). Much of what we do only has to be // done once for the vreg. We use an empty interval to detect the first // time we see a vreg. - LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); + LiveVariables::VarInfo& vi = LV->getVarInfo(interval.reg); if (interval.empty()) { // Get the Idx of the defining instructions. SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); @@ -226,22 +250,22 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, DEBUG(dbgs() << " +" << NewLR); interval.addRange(NewLR); - bool PHIJoin = lv_->isPHIJoin(interval.reg); + bool PHIJoin = LV->isPHIJoin(interval.reg); if (PHIJoin) { - // A phi join register is killed at the end of the MBB and revived as a new - // valno in the killing blocks. + // A phi join register is killed at the end of the MBB and revived as a + // new valno in the killing blocks. assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); DEBUG(dbgs() << " phi-join"); - ValNo->setHasPHIKill(true); } else { // Iterate over all of the blocks that the variable is completely // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the // live interval. for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), E = vi.AliveBlocks.end(); I != E; ++I) { - MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I); - LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo); + MachineBasicBlock *aliveBlock = MF->getBlockNumbered(*I); + LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), + ValNo); interval.addRange(LR); DEBUG(dbgs() << " +" << LR); } @@ -260,7 +284,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, assert(getInstructionFromIndex(Start) == 0 && "PHI def index points at actual instruction."); ValNo = interval.getNextValue(Start, VNInfoAllocator); - ValNo->setIsPHIDef(true); } LiveRange LR(Start, killIdx, ValNo); interval.addRange(LR); @@ -319,11 +342,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(), OldValNo)); - DEBUG({ - dbgs() << " RESULT: "; - interval.print(dbgs(), tri_); - }); - } else if (lv_->isPHIJoin(interval.reg)) { + DEBUG(dbgs() << " RESULT: " << interval); + } else if (LV->isPHIJoin(interval.reg)) { // In the case of PHI elimination, each variable definition is only // live until the end of the block. We've already taken care of the // rest of the live range. @@ -337,7 +357,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); - ValNo->setHasPHIKill(true); DEBUG(dbgs() << " phi-join +" << LR); } else { llvm_unreachable("Multiply defined register"); @@ -347,101 +366,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, DEBUG(dbgs() << '\n'); } -static bool isRegLiveIntoSuccessor(const MachineBasicBlock *MBB, unsigned Reg) { - for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); - SI != SE; ++SI) { - const MachineBasicBlock* succ = *SI; - if (succ->isLiveIn(Reg)) - return true; - } - return false; -} - -void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, - MachineBasicBlock::iterator mi, - SlotIndex MIIdx, - MachineOperand& MO, - LiveInterval &interval) { - DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); - - SlotIndex baseIndex = MIIdx; - SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber()); - SlotIndex end = start; - - // If it is not used after definition, it is considered dead at - // the instruction defining it. Hence its interval is: - // [defSlot(def), defSlot(def)+1) - // For earlyclobbers, the defSlot was pushed back one; the extra - // advance below compensates. - if (MO.isDead()) { - DEBUG(dbgs() << " dead"); - end = start.getDeadSlot(); - goto exit; - } - - // If it is not dead on definition, it must be killed by a - // subsequent instruction. Hence its interval is: - // [defSlot(def), useSlot(kill)+1) - baseIndex = baseIndex.getNextIndex(); - while (++mi != MBB->end()) { - - if (mi->isDebugValue()) - continue; - if (getInstructionFromIndex(baseIndex) == 0) - baseIndex = indexes_->getNextNonNullIndex(baseIndex); - - if (mi->killsRegister(interval.reg, tri_)) { - DEBUG(dbgs() << " killed"); - end = baseIndex.getRegSlot(); - goto exit; - } else { - int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_); - if (DefIdx != -1) { - if (mi->isRegTiedToUseOperand(DefIdx)) { - // Two-address instruction. - end = baseIndex.getRegSlot(mi->getOperand(DefIdx).isEarlyClobber()); - } else { - // Another instruction redefines the register before it is ever read. - // Then the register is essentially dead at the instruction that - // defines it. Hence its interval is: - // [defSlot(def), defSlot(def)+1) - DEBUG(dbgs() << " dead"); - end = start.getDeadSlot(); - } - goto exit; - } - } - - baseIndex = baseIndex.getNextIndex(); - } - - // If we get here the register *should* be live out. - assert(!isAllocatable(interval.reg) && "Physregs shouldn't be live out!"); - - // FIXME: We need saner rules for reserved regs. - if (isReserved(interval.reg)) { - end = start.getDeadSlot(); - } else { - // Unreserved, unallocable registers like EFLAGS can be live across basic - // block boundaries. - assert(isRegLiveIntoSuccessor(MBB, interval.reg) && - "Unreserved reg not live-out?"); - end = getMBBEndIdx(MBB); - } -exit: - assert(start < end && "did not find end of interval?"); - - // Already exists? Extend old live interval. - VNInfo *ValNo = interval.getVNInfoAt(start); - bool Extend = ValNo != 0; - if (!Extend) - ValNo = interval.getNextValue(start, VNInfoAllocator); - LiveRange LR(start, end, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR << '\n'); -} - void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI, SlotIndex MIIdx, @@ -450,93 +374,6 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, getOrCreateInterval(MO.getReg())); - else - handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, - getOrCreateInterval(MO.getReg())); -} - -void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, - SlotIndex MIIdx, - LiveInterval &interval) { - assert(TargetRegisterInfo::isPhysicalRegister(interval.reg) && - "Only physical registers can be live in."); - assert((!isAllocatable(interval.reg) || MBB->getParent()->begin() || - MBB->isLandingPad()) && - "Allocatable live-ins only valid for entry blocks and landing pads."); - - DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_)); - - // Look for kills, if it reaches a def before it's killed, then it shouldn't - // be considered a livein. - MachineBasicBlock::iterator mi = MBB->begin(); - MachineBasicBlock::iterator E = MBB->end(); - // Skip over DBG_VALUE at the start of the MBB. - if (mi != E && mi->isDebugValue()) { - while (++mi != E && mi->isDebugValue()) - ; - if (mi == E) - // MBB is empty except for DBG_VALUE's. - return; - } - - SlotIndex baseIndex = MIIdx; - SlotIndex start = baseIndex; - if (getInstructionFromIndex(baseIndex) == 0) - baseIndex = indexes_->getNextNonNullIndex(baseIndex); - - SlotIndex end = baseIndex; - bool SeenDefUse = false; - - while (mi != E) { - if (mi->killsRegister(interval.reg, tri_)) { - DEBUG(dbgs() << " killed"); - end = baseIndex.getRegSlot(); - SeenDefUse = true; - break; - } else if (mi->modifiesRegister(interval.reg, tri_)) { - // Another instruction redefines the register before it is ever read. - // Then the register is essentially dead at the instruction that defines - // it. Hence its interval is: - // [defSlot(def), defSlot(def)+1) - DEBUG(dbgs() << " dead"); - end = start.getDeadSlot(); - SeenDefUse = true; - break; - } - - while (++mi != E && mi->isDebugValue()) - // Skip over DBG_VALUE. - ; - if (mi != E) - baseIndex = indexes_->getNextNonNullIndex(baseIndex); - } - - // Live-in register might not be used at all. - if (!SeenDefUse) { - if (isAllocatable(interval.reg) || - !isRegLiveIntoSuccessor(MBB, interval.reg)) { - // Allocatable registers are never live through. - // Non-allocatable registers that aren't live into any successors also - // aren't live through. - DEBUG(dbgs() << " dead"); - return; - } else { - // If we get here the register is non-allocatable and live into some - // successor. We'll conservatively assume it's live-through. - DEBUG(dbgs() << " live through"); - end = getMBBEndIdx(MBB); - } - } - - SlotIndex defIdx = getMBBStartIdx(MBB); - assert(getInstructionFromIndex(defIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *vni = interval.getNextValue(defIdx, VNInfoAllocator); - vni->setIsPHIDef(true); - LiveRange LR(start, end, vni); - - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR << '\n'); } /// computeIntervals - computes the live intervals for virtual @@ -546,12 +383,12 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" << "********** Function: " - << ((Value*)mf_->getFunction())->getName() << '\n'); + << ((Value*)MF->getFunction())->getName() << '\n'); - RegMaskBlocks.resize(mf_->getNumBlockIDs()); + RegMaskBlocks.resize(MF->getNumBlockIDs()); SmallVector<unsigned, 8> UndefUses; - for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end(); + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size(); @@ -564,22 +401,16 @@ void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "BB#" << MBB->getNumber() << ":\t\t# derived from " << MBB->getName() << "\n"); - // Create intervals for live-ins to this BB first. - for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), - LE = MBB->livein_end(); LI != LE; ++LI) { - handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); - } - // Skip over empty initial indices. if (getInstructionFromIndex(MIIndex) == 0) - MIIndex = indexes_->getNextNonNullIndex(MIIndex); + MIIndex = Indexes->getNextNonNullIndex(MIIndex); for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); MI != miEnd; ++MI) { DEBUG(dbgs() << MIIndex << "\t" << *MI); if (MI->isDebugValue()) continue; - assert(indexes_->getInstructionFromIndex(MIIndex) == MI && + assert(Indexes->getInstructionFromIndex(MIIndex) == MI && "Lost SlotIndex synchronization"); // Handle defs. @@ -593,7 +424,7 @@ void LiveIntervals::computeIntervals() { continue; } - if (!MO.isReg() || !MO.getReg()) + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; // handle register defs - build intervals @@ -604,7 +435,7 @@ void LiveIntervals::computeIntervals() { } // Move to the next instr slot. - MIIndex = indexes_->getNextNonNullIndex(MIIndex); + MIIndex = Indexes->getNextNonNullIndex(MIIndex); } // Compute the number of register mask instructions in this block. @@ -626,14 +457,147 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) { return new LiveInterval(reg, Weight); } -/// dupInterval - Duplicate a live interval. The caller is responsible for -/// managing the allocated memory. -LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { - LiveInterval *NewLI = createInterval(li->reg); - NewLI->Copy(*li, mri_, getVNInfoAllocator()); - return NewLI; + +/// computeVirtRegInterval - Compute the live interval of a virtual register, +/// based on defs and uses. +void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) { + assert(LRCalc && "LRCalc not initialized."); + assert(LI->empty() && "Should only compute empty intervals."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + LRCalc->createDeadDefs(LI); + LRCalc->extendToUses(LI); +} + +void LiveIntervals::computeVirtRegs() { + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + LiveInterval *LI = createInterval(Reg); + VirtRegIntervals[Reg] = LI; + computeVirtRegInterval(LI); + } +} + +void LiveIntervals::computeRegMasks() { + RegMaskBlocks.resize(MF->getNumBlockIDs()); + + // Find all instructions with regmask operands. + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()]; + RMB.first = RegMaskSlots.size(); + for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end(); + MI != ME; ++MI) + for (MIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isRegMask()) + continue; + RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot()); + RegMaskBits.push_back(MO->getRegMask()); + } + // Compute the number of register mask instructions in this block. + RMB.second = RegMaskSlots.size() - RMB.first;; + } } +//===----------------------------------------------------------------------===// +// Register Unit Liveness +//===----------------------------------------------------------------------===// +// +// Fixed interference typically comes from ABI boundaries: Function arguments +// and return values are passed in fixed registers, and so are exception +// pointers entering landing pads. Certain instructions require values to be +// present in specific registers. That is also represented through fixed +// interference. +// + +/// computeRegUnitInterval - Compute the live interval of a register unit, based +/// on the uses and defs of aliasing registers. The interval should be empty, +/// or contain only dead phi-defs from ABI blocks. +void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { + unsigned Unit = LI->reg; + + assert(LRCalc && "LRCalc not initialized."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + + // The physregs aliasing Unit are the roots and their super-registers. + // Create all values as dead defs before extending to uses. Note that roots + // may share super-registers. That's OK because createDeadDefs() is + // idempotent. It is very rare for a register unit to have multiple roots, so + // uniquing super-registers is probably not worthwhile. + for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { + unsigned Root = *Roots; + if (!MRI->reg_empty(Root)) + LRCalc->createDeadDefs(LI, Root); + for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + if (!MRI->reg_empty(*Supers)) + LRCalc->createDeadDefs(LI, *Supers); + } + } + + // Now extend LI to reach all uses. + // Ignore uses of reserved registers. We only track defs of those. + for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { + unsigned Root = *Roots; + if (!isReserved(Root) && !MRI->reg_empty(Root)) + LRCalc->extendToUses(LI, Root); + for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + unsigned Reg = *Supers; + if (!isReserved(Reg) && !MRI->reg_empty(Reg)) + LRCalc->extendToUses(LI, Reg); + } + } +} + + +/// computeLiveInRegUnits - Precompute the live ranges of any register units +/// that are live-in to an ABI block somewhere. Register values can appear +/// without a corresponding def when entering the entry block or a landing pad. +/// +void LiveIntervals::computeLiveInRegUnits() { + RegUnitIntervals.resize(TRI->getNumRegUnits()); + DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n"); + + // Keep track of the intervals allocated. + SmallVector<LiveInterval*, 8> NewIntvs; + + // Check all basic blocks for live-ins. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + const MachineBasicBlock *MBB = MFI; + + // We only care about ABI blocks: Entry + landing pads. + if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty()) + continue; + + // Create phi-defs at Begin for all live-in registers. + SlotIndex Begin = Indexes->getMBBStartIdx(MBB); + DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber()); + for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(), + LIE = MBB->livein_end(); LII != LIE; ++LII) { + for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) { + unsigned Unit = *Units; + LiveInterval *Intv = RegUnitIntervals[Unit]; + if (!Intv) { + Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF); + NewIntvs.push_back(Intv); + } + VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator()); + (void)VNI; + DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id); + } + } + DEBUG(dbgs() << '\n'); + } + DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n"); + + // Compute the 'normal' part of the intervals. + for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i) + computeRegUnitInterval(NewIntvs[i]); +} + + /// shrinkToUses - After removing some uses of a register, shrink its live /// range to just the remaining uses. This method does not compute reaching /// defs for new uses, and it doesn't remove dead defs. @@ -649,14 +613,13 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallPtrSet<MachineBasicBlock*, 16> LiveOut; // Visit all instructions reading li->reg. - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg); + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(li->reg); MachineInstr *UseMI = I.skipInstruction();) { if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); - // Note: This intentionally picks up the wrong VNI in case of an EC redef. - // See below. - VNInfo *VNI = li->getVNInfoBefore(Idx); + LiveRangeQuery LRQ(*li, Idx); + VNInfo *VNI = LRQ.valueIn(); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is // no live value. It is likely caused by a target getting <undef> flags @@ -667,13 +630,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, continue; } // Special case: An early-clobber tied operand reads and writes the - // register one slot early. The getVNInfoBefore call above would have - // picked up the value defined by UseMI. Adjust the kill slot and value. - if (SlotIndex::isSameInstr(VNI->def, Idx)) { - Idx = VNI->def; - VNI = li->getVNInfoBefore(Idx); - assert(VNI && "Early-clobber tied value not available"); - } + // register one slot early. + if (VNInfo *DefVNI = LRQ.valueDefined()) + Idx = DefVNI->def; + WorkList.push_back(std::make_pair(Idx, VNI)); } @@ -747,7 +707,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. - VNI->setIsUnused(true); + VNI->markUnused(); NewLI.removeRange(*LII); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); CanSeparate = true; @@ -755,7 +715,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(VNI->def); assert(MI && "No instruction defining live value"); - MI->addRegisterDead(li->reg, tri_); + MI->addRegisterDead(li->reg, TRI); if (dead && MI->allDefsAreDead()) { DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI); dead->push_back(MI); @@ -775,13 +735,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // void LiveIntervals::addKillFlags() { - for (iterator I = begin(), E = end(); I != E; ++I) { - unsigned Reg = I->first; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) continue; - if (mri_->reg_nodbg_empty(Reg)) - continue; - LiveInterval *LI = I->second; + LiveInterval *LI = &getInterval(Reg); // Every instruction that kills Reg corresponds to a live range end point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; @@ -797,101 +755,6 @@ void LiveIntervals::addKillFlags() { } } -/// getReMatImplicitUse - If the remat definition MI has one (for now, we only -/// allow one) virtual register operand, then its uses are implicitly using -/// the register. Returns the virtual register. -unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, - MachineInstr *MI) const { - unsigned RegOp = 0; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0 || Reg == li.reg) - continue; - - if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isAllocatable(Reg)) - continue; - RegOp = MO.getReg(); - break; // Found vreg operand - leave the loop. - } - return RegOp; -} - -/// isValNoAvailableAt - Return true if the val# of the specified interval -/// which reaches the given instruction also reaches the specified use index. -bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, - SlotIndex UseIdx) const { - VNInfo *UValNo = li.getVNInfoAt(UseIdx); - return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI)); -} - -/// isReMaterializable - Returns true if the definition MI of the specified -/// val# of the specified interval is re-materializable. -bool -LiveIntervals::isReMaterializable(const LiveInterval &li, - const VNInfo *ValNo, MachineInstr *MI, - const SmallVectorImpl<LiveInterval*> *SpillIs, - bool &isLoad) { - if (DisableReMat) - return false; - - if (!tii_->isTriviallyReMaterializable(MI, aa_)) - return false; - - // Target-specific code can mark an instruction as being rematerializable - // if it has one virtual reg use, though it had better be something like - // a PIC base register which is likely to be live everywhere. - unsigned ImpUse = getReMatImplicitUse(li, MI); - if (ImpUse) { - const LiveInterval &ImpLi = getInterval(ImpUse); - for (MachineRegisterInfo::use_nodbg_iterator - ri = mri_->use_nodbg_begin(li.reg), re = mri_->use_nodbg_end(); - ri != re; ++ri) { - MachineInstr *UseMI = &*ri; - SlotIndex UseIdx = getInstructionIndex(UseMI); - if (li.getVNInfoAt(UseIdx) != ValNo) - continue; - if (!isValNoAvailableAt(ImpLi, MI, UseIdx)) - return false; - } - - // If a register operand of the re-materialized instruction is going to - // be spilled next, then it's not legal to re-materialize this instruction. - if (SpillIs) - for (unsigned i = 0, e = SpillIs->size(); i != e; ++i) - if (ImpUse == (*SpillIs)[i]->reg) - return false; - } - return true; -} - -/// isReMaterializable - Returns true if every definition of MI of every -/// val# of the specified interval is re-materializable. -bool -LiveIntervals::isReMaterializable(const LiveInterval &li, - const SmallVectorImpl<LiveInterval*> *SpillIs, - bool &isLoad) { - isLoad = false; - for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); - i != e; ++i) { - const VNInfo *VNI = *i; - if (VNI->isUnused()) - continue; // Dead val#. - // Is the def for the val# rematerializable? - MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def); - if (!ReMatDefMI) - return false; - bool DefIsLoad = false; - if (!ReMatDefMI || - !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad)) - return false; - isLoad |= DefIsLoad; - } - return true; -} - MachineBasicBlock* LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { // A local live range must be fully contained inside the block, meaning it is @@ -911,11 +774,30 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { // getMBBFromIndex doesn't need to search the MBB table when both indexes // belong to proper instructions. - MachineBasicBlock *MBB1 = indexes_->getMBBFromIndex(Start); - MachineBasicBlock *MBB2 = indexes_->getMBBFromIndex(Stop); + MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start); + MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop); return MBB1 == MBB2 ? MBB1 : NULL; } +bool +LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + const VNInfo *PHI = *I; + if (PHI->isUnused() || !PHI->isPHIDef()) + continue; + const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def); + // Conservatively return true instead of scanning huge predecessor lists. + if (PHIMBB->pred_size() > 100) + return true; + for (MachineBasicBlock::const_pred_iterator + PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI) + if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI))) + return true; + } + return false; +} + float LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { // Limit the loop depth ridiculousness. @@ -940,7 +822,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, VNInfo* VN = Interval.getNextValue( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getVNInfoAllocator()); - VN->setHasPHIKill(true); LiveRange LR( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); @@ -990,7 +871,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, if (!Found) { // This is the first overlap. Initialize UsableRegs to all ones. UsableRegs.clear(); - UsableRegs.resize(tri_->getNumRegs(), true); + UsableRegs.resize(TRI->getNumRegs(), true); Found = true; } // Remove usable registers clobbered by this mask. @@ -1101,6 +982,9 @@ public: BundleRanges BR = createBundleRanges(Entering, Internal, Exiting); + Entering.clear(); + Internal.clear(); + Exiting.clear(); collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); @@ -1176,78 +1060,44 @@ private: // TODO: Currently we're skipping uses that are reserved or have no // interval, but we're not updating their kills. This should be // fixed. - if (!LIS.hasInterval(Reg) || - (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))) + if (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)) continue; - LiveInterval* LI = &LIS.getInterval(Reg); - - if (MO.readsReg()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx); - if (LR != 0) - Entering.insert(std::make_pair(LI, LR)); - } - if (MO.isDef()) { - if (MO.isEarlyClobber()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot(true)); - assert(LR != 0 && "No EC range?"); - if (LR->end > OldIdx.getDeadSlot()) - Exiting.insert(std::make_pair(LI, LR)); - else - Internal.insert(std::make_pair(LI, LR)); - } else if (MO.isDead()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot()); - assert(LR != 0 && "No dead-def range?"); - Internal.insert(std::make_pair(LI, LR)); - } else { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getDeadSlot()); - assert(LR && LR->end > OldIdx.getDeadSlot() && - "Non-dead-def should have live range exiting."); - Exiting.insert(std::make_pair(LI, LR)); - } + // Collect ranges for register units. These live ranges are computed on + // demand, so just skip any that haven't been computed yet. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) + collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx); + } else { + // Collect ranges for individual virtual registers. + collectRanges(MO, &LIS.getInterval(Reg), + Entering, Internal, Exiting, OldIdx); } } } - // Collect IntRangePairs for all operands of MI that may need fixing. - void collectRangesInBundle(MachineInstr* MI, RangeSet& Entering, - RangeSet& Exiting, SlotIndex MIStartIdx, - SlotIndex MIEndIdx) { - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - const MachineOperand& MO = *MOI; - assert(!MO.isRegMask() && "Can't have RegMasks in bundles."); - if (!MO.isReg() || MO.getReg() == 0) - continue; - - unsigned Reg = MO.getReg(); - - // TODO: Currently we're skipping uses that are reserved or have no - // interval, but we're not updating their kills. This should be - // fixed. - if (!LIS.hasInterval(Reg) || - (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))) - continue; - - LiveInterval* LI = &LIS.getInterval(Reg); - - if (MO.readsReg()) { - LiveRange* LR = LI->getLiveRangeContaining(MIStartIdx); - if (LR != 0) - Entering.insert(std::make_pair(LI, LR)); - } - if (MO.isDef()) { - assert(!MO.isEarlyClobber() && "Early clobbers not allowed in bundles."); - assert(!MO.isDead() && "Dead-defs not allowed in bundles."); - LiveRange* LR = LI->getLiveRangeContaining(MIEndIdx.getDeadSlot()); - assert(LR != 0 && "Internal ranges not allowed in bundles."); + void collectRanges(const MachineOperand &MO, LiveInterval *LI, + RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting, + SlotIndex OldIdx) { + if (MO.readsReg()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx); + if (LR != 0) + Entering.insert(std::make_pair(LI, LR)); + } + if (MO.isDef()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot()); + assert(LR != 0 && "No live range for def?"); + if (LR->end > OldIdx.getDeadSlot()) Exiting.insert(std::make_pair(LI, LR)); - } + else + Internal.insert(std::make_pair(LI, LR)); } } - BundleRanges createBundleRanges(RangeSet& Entering, RangeSet& Internal, RangeSet& Exiting) { + BundleRanges createBundleRanges(RangeSet& Entering, + RangeSet& Internal, + RangeSet& Exiting) { BundleRanges BR; for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); @@ -1284,7 +1134,8 @@ private: return; // Bail out if we don't have kill flags on the old register. MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx); assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill."); - assert(!NewKillMI->killsRegister(reg) && "New kill instr is already a kill."); + assert(!NewKillMI->killsRegister(reg) && + "New kill instr is already a kill."); OldKillMI->clearRegisterKills(reg, &TRI); NewKillMI->addRegisterKilled(reg, &TRI); } @@ -1523,22 +1374,23 @@ private: }; void LiveIntervals::handleMove(MachineInstr* MI) { - SlotIndex OldIndex = indexes_->getInstructionIndex(MI); - indexes_->removeMachineInstrFromMaps(MI); + SlotIndex OldIndex = Indexes->getInstructionIndex(MI); + Indexes->removeMachineInstrFromMaps(MI); SlotIndex NewIndex = MI->isInsideBundle() ? - indexes_->getInstructionIndex(MI) : - indexes_->insertMachineInstrInMaps(MI); + Indexes->getInstructionIndex(MI) : + Indexes->insertMachineInstrInMaps(MI); assert(getMBBStartIdx(MI->getParent()) <= OldIndex && OldIndex < getMBBEndIdx(MI->getParent()) && "Cannot handle moves across basic block boundaries."); assert(!MI->isBundled() && "Can't handle bundled instructions yet."); - HMEditor HME(*this, *mri_, *tri_, NewIndex); + HMEditor HME(*this, *MRI, *TRI, NewIndex); HME.moveAllRangesFrom(MI, OldIndex); } -void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart) { - SlotIndex NewIndex = indexes_->getInstructionIndex(BundleStart); - HMEditor HME(*this, *mri_, *tri_, NewIndex); +void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, + MachineInstr* BundleStart) { + SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart); + HMEditor HME(*this, *MRI, *TRI, NewIndex); HME.moveAllRangesInto(MI, BundleStart); } diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index 60a6880..dadd02b 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -81,7 +81,6 @@ void LiveIntervalUnion::extract(LiveInterval &VirtReg) { void LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { - OS << "LIU " << PrintReg(RepReg, TRI); if (empty()) { OS << " empty\n"; return; @@ -209,3 +208,26 @@ bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) { VRI = VirtReg->advanceTo(VRI, Overlaps.start()); } } + +void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc, + unsigned NSize) { + // Reuse existing allocation. + if (NSize == Size) + return; + clear(); + Size = NSize; + LIUs = static_cast<LiveIntervalUnion*>( + malloc(sizeof(LiveIntervalUnion)*NSize)); + for (unsigned i = 0; i != Size; ++i) + new(LIUs + i) LiveIntervalUnion(Alloc); +} + +void LiveIntervalUnion::Array::clear() { + if (!LIUs) + return; + for (unsigned i = 0; i != Size; ++i) + LIUs[i].~LiveIntervalUnion(); + free(LIUs); + Size = 0; + LIUs = 0; +} diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index dbf5ac1..cd4e690 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -60,13 +60,11 @@ public: class Query; private: - const unsigned RepReg; // representative register number unsigned Tag; // unique tag for current contents. LiveSegments Segments; // union of virtual reg segments public: - LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a) - {} + explicit LiveIntervalUnion(Allocator &a) : Tag(0), Segments(a) {} // Iterate over all segments in the union of live virtual registers ordered // by their starting position. @@ -183,6 +181,28 @@ public: Query(const Query&); // DO NOT IMPLEMENT void operator=(const Query&); // DO NOT IMPLEMENT }; + + // Array of LiveIntervalUnions. + class Array { + unsigned Size; + LiveIntervalUnion *LIUs; + public: + Array() : Size(0), LIUs(0) {} + ~Array() { clear(); } + + // Initialize the array to have Size entries. + // Reuse an existing allocation if the size matches. + void init(LiveIntervalUnion::Allocator&, unsigned Size); + + unsigned size() const { return Size; } + + void clear(); + + LiveIntervalUnion& operator[](unsigned idx) { + assert(idx < Size && "idx out of bounds"); + return LIUs[idx]; + } + }; }; } // end namespace llvm diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index d8ab791..d828f25 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -14,10 +14,19 @@ #define DEBUG_TYPE "regalloc" #include "LiveRangeCalc.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; -void LiveRangeCalc::reset(const MachineFunction *MF) { +void LiveRangeCalc::reset(const MachineFunction *MF, + SlotIndexes *SI, + MachineDominatorTree *MDT, + VNInfo::Allocator *VNIA) { + MRI = &MF->getRegInfo(); + Indexes = SI; + DomTree = MDT; + Alloc = VNIA; + unsigned N = MF->getNumBlockIDs(); Seen.clear(); Seen.resize(N); @@ -26,8 +35,72 @@ void LiveRangeCalc::reset(const MachineFunction *MF) { } +void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { + assert(MRI && Indexes && "call reset() first"); + + // Visit all def operands. If the same instruction has multiple defs of Reg, + // LI->createDeadDef() will deduplicate. + for (MachineRegisterInfo::def_iterator + I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) { + const MachineInstr *MI = &*I; + // Find the corresponding slot index. + SlotIndex Idx; + if (MI->isPHI()) + // PHI defs begin at the basic block start index. + Idx = Indexes->getMBBStartIdx(MI->getParent()); + else + // Instructions are either normal 'r', or early clobber 'e'. + Idx = Indexes->getInstructionIndex(MI) + .getRegSlot(I.getOperand().isEarlyClobber()); + + // Create the def in LI. This may find an existing def. + LI->createDeadDef(Idx, *Alloc); + } +} + + +void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { + assert(MRI && Indexes && "call reset() first"); + + // Visit all operands that read Reg. This may include partial defs. + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), + E = MRI->reg_nodbg_end(); I != E; ++I) { + const MachineOperand &MO = I.getOperand(); + if (!MO.readsReg()) + continue; + // MI is reading Reg. We may have visited MI before if it happens to be + // reading Reg multiple times. That is OK, extend() is idempotent. + const MachineInstr *MI = &*I; + + // Find the SlotIndex being read. + SlotIndex Idx; + if (MI->isPHI()) { + assert(!MO.isDef() && "Cannot handle PHI def of partial register."); + // PHI operands are paired: (Reg, PredMBB). + // Extend the live range to be live-out from PredMBB. + Idx = Indexes->getMBBEndIdx(MI->getOperand(I.getOperandNo()+1).getMBB()); + } else { + // This is a normal instruction. + Idx = Indexes->getInstructionIndex(MI).getRegSlot(); + // Check for early-clobber redefs. + unsigned DefIdx; + if (MO.isDef()) { + if (MO.isEarlyClobber()) + Idx = Idx.getRegSlot(true); + } else if (MI->isRegTiedToDefOperand(I.getOperandNo(), &DefIdx)) { + // FIXME: This would be a lot easier if tied early-clobber uses also + // had an early-clobber flag. + if (MI->getOperand(DefIdx).isEarlyClobber()) + Idx = Idx.getRegSlot(true); + } + } + extend(LI, Idx, Reg); + } +} + + // Transfer information from the LiveIn vector to the live ranges. -void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI, SlotIndexes *Indexes) { +void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI) { for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(), E = LiveIn.end(); I != E; ++I) { if (!I->DomNode) @@ -56,9 +129,7 @@ void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI, SlotIndexes *Indexes) { void LiveRangeCalc::extend(LiveInterval *LI, SlotIndex Kill, - SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc) { + unsigned PhysReg) { assert(LI && "Missing live range"); assert(Kill.isValid() && "Invalid SlotIndex"); assert(Indexes && "Missing SlotIndexes"); @@ -75,34 +146,31 @@ void LiveRangeCalc::extend(LiveInterval *LI, // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, Indexes, DomTree); + VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, PhysReg); // When there were multiple different values, we may need new PHIs. if (!VNI) - updateSSA(Indexes, DomTree, Alloc); + updateSSA(); - updateLiveIns(VNI, Indexes); + updateLiveIns(VNI); } // This function is called by a client after using the low-level API to add // live-out and live-in blocks. The unique value optimization is not // available, SplitEditor::transferValues handles that case directly anyway. -void LiveRangeCalc::calculateValues(SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc) { +void LiveRangeCalc::calculateValues() { assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); - updateSSA(Indexes, DomTree, Alloc); - updateLiveIns(0, Indexes); + updateSSA(); + updateLiveIns(0); } VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, MachineBasicBlock *KillMBB, SlotIndex Kill, - SlotIndexes *Indexes, - MachineDominatorTree *DomTree) { + unsigned PhysReg) { // Blocks where LI should be live-in. SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB); @@ -113,7 +181,22 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, // Using Seen as a visited set, perform a BFS for all reaching defs. for (unsigned i = 0; i != WorkList.size(); ++i) { MachineBasicBlock *MBB = WorkList[i]; - assert(!MBB->pred_empty() && "Value live-in to entry block?"); + +#ifndef NDEBUG + if (MBB->pred_empty()) { + MBB->getParent()->verify(); + llvm_unreachable("Use not jointly dominated by defs."); + } + + if (TargetRegisterInfo::isPhysicalRegister(PhysReg) && + !MBB->isLiveIn(PhysReg)) { + MBB->getParent()->verify(); + errs() << "The register needs to be live in to BB#" << MBB->getNumber() + << ", but is missing from the live-in list.\n"; + llvm_unreachable("Invalid global physical register"); + } +#endif + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *Pred = *PI; @@ -168,9 +251,7 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, // This is essentially the same iterative algorithm that SSAUpdater uses, // except we already have a dominator tree, so we don't have to recompute it. -void LiveRangeCalc::updateSSA(SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc) { +void LiveRangeCalc::updateSSA() { assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); @@ -238,7 +319,6 @@ void LiveRangeCalc::updateSSA(SlotIndexes *Indexes, SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); - VNI->setIsPHIDef(true); I->Value = VNI; // This block is done, we know the final value. I->DomNode = 0; diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index b8c8585..909829b 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -34,6 +34,11 @@ template <class NodeT> class DomTreeNodeBase; typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode; class LiveRangeCalc { + const MachineRegisterInfo *MRI; + SlotIndexes *Indexes; + MachineDominatorTree *DomTree; + VNInfo::Allocator *Alloc; + /// Seen - Bit vector of active entries in LiveOut, also used as a visited /// set by findReachingDefs. One entry per basic block, indexed by block /// number. This is kept as a separate bit vector because it can be cleared @@ -100,26 +105,27 @@ class LiveRangeCalc { /// to be live-in are added to LiveIn. If a unique reaching def is found, /// its value is returned, if Kill is jointly dominated by multiple values, /// NULL is returned. + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *KillMBB, SlotIndex Kill, - SlotIndexes *Indexes, - MachineDominatorTree *DomTree); + unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. /// /// Every live-in block must be jointly dominated by the added live-out /// blocks. No values are read from the live ranges. - void updateSSA(SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc); + void updateSSA(); /// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI /// as a wildcard value for LiveIn entries without a value. - void updateLiveIns(VNInfo *VNI, SlotIndexes*); + void updateLiveIns(VNInfo *VNI); public: + LiveRangeCalc() : MRI(0), Indexes(0), DomTree(0), Alloc(0) {} + //===--------------------------------------------------------------------===// // High-level interface. //===--------------------------------------------------------------------===// @@ -132,14 +138,14 @@ public: /// that may overlap a previously computed live range, and before the first /// live range in a function. If live ranges are not known to be /// non-overlapping, call reset before each. - void reset(const MachineFunction *MF); + void reset(const MachineFunction *MF, + SlotIndexes*, + MachineDominatorTree*, + VNInfo::Allocator*); /// calculate - Calculate the live range of a virtual register from its defs /// and uses. LI must be empty with no values. - void calculate(LiveInterval *LI, - MachineRegisterInfo *MRI, - SlotIndexes *Indexes, - VNInfo::Allocator *Alloc); + void calculate(LiveInterval *LI); //===--------------------------------------------------------------------===// // Mid-level interface. @@ -154,21 +160,30 @@ public: /// Kill is not dominated by a single existing value, PHI-defs are inserted /// as required to preserve SSA form. If Kill is known to be dominated by a /// single existing value, Alloc may be null. - void extend(LiveInterval *LI, - SlotIndex Kill, - SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc); + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. + void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0); + + /// createDeadDefs - Create a dead def in LI for every def operand of Reg. + /// Each instruction defining Reg gets a new VNInfo with a corresponding + /// minimal live range. + void createDeadDefs(LiveInterval *LI, unsigned Reg); - /// extendToUses - Extend the live range of LI to reach all uses. + /// createDeadDefs - Create a dead def in LI for every def of LI->reg. + void createDeadDefs(LiveInterval *LI) { + createDeadDefs(LI, LI->reg); + } + + /// extendToUses - Extend the live range of LI to reach all uses of Reg. /// /// All uses must be jointly dominated by existing liveness. PHI-defs are /// inserted as needed to preserve SSA form. - void extendToUses(LiveInterval *LI, - MachineRegisterInfo *MRI, - SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc); + void extendToUses(LiveInterval *LI, unsigned Reg); + + /// extendToUses - Extend the live range of LI to reach all uses of LI->reg. + void extendToUses(LiveInterval *LI) { + extendToUses(LI, LI->reg); + } //===--------------------------------------------------------------------===// // Low-level interface. @@ -216,9 +231,7 @@ public: /// /// Every predecessor of a live-in block must have been given a value with /// setLiveOutValue, the value may be null for live-trough blocks. - void calculateValues(SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc); + void calculateValues(); }; } // end namespace llvm diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 695f536..b4ce9aa 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -38,7 +38,7 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) { VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } LiveInterval &LI = LIS.getOrCreateInterval(VReg); - newRegs_.push_back(&LI); + NewRegs.push_back(&LI); return LI; } @@ -46,16 +46,16 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, AliasAnalysis *aa) { assert(DefMI && "Missing instruction"); - scannedRemattable_ = true; + ScannedRemattable = true; if (!TII.isTriviallyReMaterializable(DefMI, aa)) return false; - remattable_.insert(VNI); + Remattable.insert(VNI); return true; } void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { - for (LiveInterval::vni_iterator I = parent_.vni_begin(), - E = parent_.vni_end(); I != E; ++I) { + for (LiveInterval::vni_iterator I = getParent().vni_begin(), + E = getParent().vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; @@ -64,13 +64,13 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { continue; checkRematerializable(VNI, DefMI, aa); } - scannedRemattable_ = true; + ScannedRemattable = true; } bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { - if (!scannedRemattable_) + if (!ScannedRemattable) scanRemattable(aa); - return !remattable_.empty(); + return !Remattable.empty(); } /// allUsesAvailableAt - Return true if all registers used by OrigMI at @@ -82,12 +82,16 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); - if (!MO.isReg() || !MO.getReg() || MO.isDef()) - continue; - // Reserved registers are OK. - if (MO.isUndef() || !LIS.hasInterval(MO.getReg())) + if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) continue; + // We can't remat physreg uses, unless it is a constant. + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction())) + continue; + return false; + } + LiveInterval &li = LIS.getInterval(MO.getReg()); const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); if (!OVNI) @@ -101,10 +105,10 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, bool LiveRangeEdit::canRematerializeAt(Remat &RM, SlotIndex UseIdx, bool cheapAsAMove) { - assert(scannedRemattable_ && "Call anyRematerializable first"); + assert(ScannedRemattable && "Call anyRematerializable first"); // Use scanRemattable info. - if (!remattable_.count(RM.ParentVNI)) + if (!Remattable.count(RM.ParentVNI)) return false; // No defining instruction provided. @@ -136,13 +140,13 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, bool Late) { assert(RM.OrigMI && "Invalid remat"); TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); - rematted_.insert(RM.ParentVNI); + Rematted.insert(RM.ParentVNI); return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) .getRegSlot(); } void LiveRangeEdit::eraseVirtReg(unsigned Reg) { - if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg)) + if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg)) LIS.removeInterval(Reg); } @@ -173,6 +177,19 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, if (!DefMI || !UseMI) return false; + // Since we're moving the DefMI load, make sure we're not extending any live + // ranges. + if (!allUsesAvailableAt(DefMI, + LIS.getInstructionIndex(DefMI), + LIS.getInstructionIndex(UseMI))) + return false; + + // We also need to make sure it is safe to move the load. + // Assume there are stores between DefMI and UseMI. + bool SawStore = true; + if (!DefMI->isSafeToMove(&TII, 0, SawStore)) + return false; + DEBUG(dbgs() << "Try to fold single def: " << *DefMI << " into single use: " << *UseMI); @@ -220,14 +237,22 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + // Collect virtual registers to be erased after MI is gone. + SmallVector<unsigned, 8> RegsToErase; + bool ReadsPhysRegs = false; + // Check for live intervals that may shrink for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { if (!MOI->isReg()) continue; unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check if MI reads any unreserved physregs. + if (Reg && MOI->readsReg() && !LIS.isReserved(Reg)) + ReadsPhysRegs = true; continue; + } LiveInterval &LI = LIS.getInterval(Reg); // Shrink read registers, unless it is likely to be expensive and @@ -242,22 +267,49 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, // Remove defined value. if (MOI->isDef()) { if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { - if (delegate_) - delegate_->LRE_WillShrinkVirtReg(LI.reg); + if (TheDelegate) + TheDelegate->LRE_WillShrinkVirtReg(LI.reg); LI.removeValNo(VNI); - if (LI.empty()) { - ToShrink.remove(&LI); - eraseVirtReg(Reg); - } + if (LI.empty()) + RegsToErase.push_back(Reg); } } } - if (delegate_) - delegate_->LRE_WillEraseInstruction(MI); - LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++NumDCEDeleted; + // Currently, we don't support DCE of physreg live ranges. If MI reads + // any unreserved physregs, don't erase the instruction, but turn it into + // a KILL instead. This way, the physreg live ranges don't end up + // dangling. + // FIXME: It would be better to have something like shrinkToUses() for + // physregs. That could potentially enable more DCE and it would free up + // the physreg. It would not happen often, though. + if (ReadsPhysRegs) { + MI->setDesc(TII.get(TargetOpcode::KILL)); + // Remove all operands that aren't physregs. + for (unsigned i = MI->getNumOperands(); i; --i) { + const MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + MI->RemoveOperand(i-1); + } + DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } + + // Erase any virtregs that are now empty and unused. There may be <undef> + // uses around. Keep the empty live range in that case. + for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { + unsigned Reg = RegsToErase[i]; + if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { + ToShrink.remove(&LIS.getInterval(Reg)); + eraseVirtReg(Reg); + } + } } if (ToShrink.empty()) @@ -268,8 +320,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, ToShrink.pop_back(); if (foldAsLoad(LI, Dead)) continue; - if (delegate_) - delegate_->LRE_WillShrinkVirtReg(LI->reg); + if (TheDelegate) + TheDelegate->LRE_WillShrinkVirtReg(LI->reg); if (!LIS.shrinkToUses(LI, &Dead)) continue; @@ -304,10 +356,14 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, // interval must contain all the split products, and LI doesn't. if (IsOriginal) VRM->setIsSplitFromReg(Dups.back()->reg, 0); - if (delegate_) - delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); + if (TheDelegate) + TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); } ConEQ.Distribute(&Dups[0], MRI); + DEBUG({ + for (unsigned i = 0; i != NumComp; ++i) + dbgs() << '\t' << *Dups[i] << '\n'; + }); } } diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp new file mode 100644 index 0000000..cdb1776 --- /dev/null +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -0,0 +1,152 @@ +//===-- LiveRegMatrix.cpp - Track register interference -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the LiveRegMatrix analysis pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "LiveRegMatrix.h" +#include "VirtRegMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +STATISTIC(NumAssigned , "Number of registers assigned"); +STATISTIC(NumUnassigned , "Number of registers unassigned"); + +char LiveRegMatrix::ID = 0; +INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix", + "Live Register Matrix", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix", + "Live Register Matrix", false, false) + +LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID), + UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {} + +void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<LiveIntervals>(); + AU.addRequiredTransitive<VirtRegMap>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + LIS = &getAnalysis<LiveIntervals>(); + VRM = &getAnalysis<VirtRegMap>(); + + unsigned NumRegUnits = TRI->getNumRegUnits(); + if (NumRegUnits != Matrix.size()) + Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]); + Matrix.init(LIUAlloc, NumRegUnits); + + // Make sure no stale queries get reused. + invalidateVirtRegs(); + return false; +} + +void LiveRegMatrix::releaseMemory() { + for (unsigned i = 0, e = Matrix.size(); i != e; ++i) { + Matrix[i].clear(); + Queries[i].clear(); + } +} + +void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { + DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) + << " to " << PrintReg(PhysReg, TRI) << ':'); + assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); + VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + MRI->setPhysRegUsed(PhysReg); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI)); + Matrix[*Units].unify(VirtReg); + } + ++NumAssigned; + DEBUG(dbgs() << '\n'); +} + +void LiveRegMatrix::unassign(LiveInterval &VirtReg) { + unsigned PhysReg = VRM->getPhys(VirtReg.reg); + DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) + << " from " << PrintReg(PhysReg, TRI) << ':'); + VRM->clearVirt(VirtReg.reg); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI)); + Matrix[*Units].extract(VirtReg); + } + ++NumUnassigned; + DEBUG(dbgs() << '\n'); +} + +bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + // Check if the cached information is valid. + // The same BitVector can be reused for all PhysRegs. + // We could cache multiple VirtRegs if it becomes necessary. + if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) { + RegMaskVirtReg = VirtReg.reg; + RegMaskTag = UserTag; + RegMaskUsable.clear(); + LIS->checkRegMaskInterference(VirtReg, RegMaskUsable); + } + + // The BitVector is indexed by PhysReg, not register unit. + // Regmask interference is more fine grained than regunits. + // For example, a Win64 call can clobber %ymm8 yet preserve %xmm8. + return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg)); +} + +bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + if (VirtReg.empty()) + return false; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (VirtReg.overlaps(LIS->getRegUnit(*Units))) + return true; + return false; +} + +LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg, + unsigned RegUnit) { + LiveIntervalUnion::Query &Q = Queries[RegUnit]; + Q.init(UserTag, &VirtReg, &Matrix[RegUnit]); + return Q; +} + +LiveRegMatrix::InterferenceKind +LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) { + if (VirtReg.empty()) + return IK_Free; + + // Regmask interference is the fastest check. + if (checkRegMaskInterference(VirtReg, PhysReg)) + return IK_RegMask; + + // Check for fixed interference. + if (checkRegUnitInterference(VirtReg, PhysReg)) + return IK_RegUnit; + + // Check the matrix for virtual register interference. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (query(VirtReg, *Units).checkInterference()) + return IK_VirtReg; + + return IK_Free; +} diff --git a/lib/CodeGen/LiveRegMatrix.h b/lib/CodeGen/LiveRegMatrix.h new file mode 100644 index 0000000..b3e2d7f --- /dev/null +++ b/lib/CodeGen/LiveRegMatrix.h @@ -0,0 +1,148 @@ +//===-- LiveRegMatrix.h - Track register interference ---------*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LiveRegMatrix analysis pass keeps track of virtual register interference +// along two dimensions: Slot indexes and register units. The matrix is used by +// register allocators to ensure that no interfering virtual registers get +// assigned to overlapping physical registers. +// +// Register units are defined in MCRegisterInfo.h, they represent the smallest +// unit of interference when dealing with overlapping physical registers. The +// LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When +// a virtual register is assigned to a physicval register, the live range for +// the virtual register is inserted into the LiveIntervalUnion for each regunit +// in the physreg. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_LIVEREGMATRIX_H +#define LLVM_CODEGEN_LIVEREGMATRIX_H + +#include "LiveIntervalUnion.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class LiveInterval; +class LiveIntervalAnalysis; +class MachineRegisterInfo; +class TargetRegisterInfo; +class VirtRegMap; + +class LiveRegMatrix : public MachineFunctionPass { + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + LiveIntervals *LIS; + VirtRegMap *VRM; + + // UserTag changes whenever virtual registers have been modified. + unsigned UserTag; + + // The matrix is represented as a LiveIntervalUnion per register unit. + LiveIntervalUnion::Allocator LIUAlloc; + LiveIntervalUnion::Array Matrix; + + // Cached queries per register unit. + OwningArrayPtr<LiveIntervalUnion::Query> Queries; + + // Cached register mask interference info. + unsigned RegMaskTag; + unsigned RegMaskVirtReg; + BitVector RegMaskUsable; + + // MachineFunctionPass boilerplate. + virtual void getAnalysisUsage(AnalysisUsage&) const; + virtual bool runOnMachineFunction(MachineFunction&); + virtual void releaseMemory(); +public: + static char ID; + LiveRegMatrix(); + + //===--------------------------------------------------------------------===// + // High-level interface. + //===--------------------------------------------------------------------===// + // + // Check for interference before assigning virtual registers to physical + // registers. + // + + /// Invalidate cached interference queries after modifying virtual register + /// live ranges. Interference checks may return stale information unless + /// caches are invalidated. + void invalidateVirtRegs() { ++UserTag; } + + enum InterferenceKind { + /// No interference, go ahead and assign. + IK_Free = 0, + + /// Virtual register interference. There are interfering virtual registers + /// assigned to PhysReg or its aliases. This interference could be resolved + /// by unassigning those other virtual registers. + IK_VirtReg, + + /// Register unit interference. A fixed live range is in the way, typically + /// argument registers for a call. This can't be resolved by unassigning + /// other virtual registers. + IK_RegUnit, + + /// RegMask interference. The live range is crossing an instruction with a + /// regmask operand that doesn't preserve PhysReg. This typically means + /// VirtReg is live across a call, and PhysReg isn't call-preserved. + IK_RegMask + }; + + /// Check for interference before assigning VirtReg to PhysReg. + /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg). + /// When there is more than one kind of interference, the InterferenceKind + /// with the highest enum value is returned. + InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg); + + /// Assign VirtReg to PhysReg. + /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and + /// update VirtRegMap. The live range is expected to be available in PhysReg. + void assign(LiveInterval &VirtReg, unsigned PhysReg); + + /// Unassign VirtReg from its PhysReg. + /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes + /// the assignment and updates VirtRegMap accordingly. + void unassign(LiveInterval &VirtReg); + + //===--------------------------------------------------------------------===// + // Low-level interface. + //===--------------------------------------------------------------------===// + // + // Provide access to the underlying LiveIntervalUnions. + // + + /// Check for regmask interference only. + /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg. + /// If PhysReg is null, check if VirtReg crosses any regmask operands. + bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0); + + /// Check for regunit interference only. + /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's + /// register units. + bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg); + + /// Query a line of the assigned virtual register matrix directly. + /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg. + /// This returns a reference to an internal Query data structure that is only + /// valid until the next query() call. + LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit); + + /// Directly access the live interval unions per regunit. + /// This returns an array indexed by the regunit number. + LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; } +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_LIVEREGMATRIX_H diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 5a0d97d..348ed3a 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -192,8 +192,8 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, unsigned LastDefReg = 0; unsigned LastDefDist = 0; MachineInstr *LastDef = NULL; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; MachineInstr *Def = PhysRegDef[SubReg]; if (!Def) continue; @@ -216,9 +216,8 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, unsigned DefReg = MO.getReg(); if (TRI->isSubRegister(Reg, DefReg)) { PartDefRegs.insert(DefReg); - for (const uint16_t *SubRegs = TRI->getSubRegisters(DefReg); - unsigned SubReg = *SubRegs; ++SubRegs) - PartDefRegs.insert(SubReg); + for (MCSubRegIterator SubRegs(DefReg, TRI); SubRegs.isValid(); ++SubRegs) + PartDefRegs.insert(*SubRegs); } } return LastDef; @@ -247,8 +246,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { true/*IsImp*/)); PhysRegDef[Reg] = LastPartialDef; SmallSet<unsigned, 8> Processed; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; if (Processed.count(SubReg)) continue; if (PartDefRegs.count(SubReg)) @@ -259,7 +258,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { false/*IsDef*/, true/*IsImp*/)); PhysRegDef[SubReg] = LastPartialDef; - for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) Processed.insert(*SS); } } @@ -271,9 +270,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { // Remember this use. PhysRegUse[Reg] = MI; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - PhysRegUse[SubReg] = MI; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + PhysRegUse[*SubRegs] = MI; } /// FindLastRefOrPartRef - Return the last reference or partial reference of @@ -287,8 +285,8 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; unsigned LastPartDefDist = 0; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; MachineInstr *Def = PhysRegDef[SubReg]; if (Def && Def != LastDef) { // There was a def of this sub-register in between. This is a partial @@ -336,8 +334,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { MachineInstr *LastPartDef = 0; unsigned LastPartDefDist = 0; SmallSet<unsigned, 8> PartUses; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; MachineInstr *Def = PhysRegDef[SubReg]; if (Def && Def != LastDef) { // There was a def of this sub-register in between. This is a partial @@ -351,7 +349,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { } if (MachineInstr *Use = PhysRegUse[SubReg]) { PartUses.insert(SubReg); - for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) PartUses.insert(*SS); unsigned Dist = DistanceMap[Use]; if (Dist > LastRefOrPartRefDist) { @@ -367,8 +365,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // EAX<dead> = op AL<imp-def> // That is, EAX def is dead but AL def extends pass it. PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; if (!PartUses.count(SubReg)) continue; bool NeedDef = true; @@ -388,11 +386,10 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { else { LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); PhysRegUse[SubReg] = LastRefOrPartRef; - for (const uint16_t *SSRegs = TRI->getSubRegisters(SubReg); - unsigned SSReg = *SSRegs; ++SSRegs) - PhysRegUse[SSReg] = LastRefOrPartRef; + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + PhysRegUse[*SS] = LastRefOrPartRef; } - for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) PartUses.erase(*SS); } } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { @@ -434,7 +431,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) { // Kill the largest clobbered super-register. // This avoids needless implicit operands. unsigned Super = Reg; - for (const uint16_t *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR)) Super = *SR; HandlePhysRegKill(Super, 0); @@ -447,11 +444,11 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, SmallSet<unsigned, 32> Live; if (PhysRegDef[Reg] || PhysRegUse[Reg]) { Live.insert(Reg); - for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS) - Live.insert(*SS); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Live.insert(*SubRegs); } else { - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; // If a register isn't itself defined, but all parts that make up of it // are defined, then consider it also defined. // e.g. @@ -462,7 +459,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, continue; if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { Live.insert(SubReg); - for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) Live.insert(*SS); } } @@ -472,8 +469,8 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, // is referenced. HandlePhysRegKill(Reg, MI); // Only some of the sub-registers are used. - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; if (!Live.count(SubReg)) // Skip if this sub-register isn't defined. continue; @@ -491,8 +488,8 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, Defs.pop_back(); PhysRegDef[Reg] = MI; PhysRegUse[Reg] = NULL; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; PhysRegDef[SubReg] = MI; PhysRegUse[SubReg] = NULL; } @@ -576,7 +573,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { unsigned MOReg = MO.getReg(); if (MO.isUse()) { MO.setIsKill(false); - UseRegs.push_back(MOReg); + if (MO.readsReg()) + UseRegs.push_back(MOReg); } else /*MO.isDef()*/ { MO.setIsDead(false); DefRegs.push_back(MOReg); @@ -732,8 +730,9 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) - PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] - .push_back(BBI->getOperand(i).getReg()); + if (BBI->getOperand(i).readsReg()) + PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] + .push_back(BBI->getOperand(i).getReg()); } bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 238bf52..fbc9e20 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -314,7 +314,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // No previously defined register was in range, so create a // new one. int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); - const TargetRegisterClass *RC = TRI->getPointerRegClass(); + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); BaseReg = Fn.getRegInfo().createVirtualRegister(RC); DEBUG(dbgs() << " Materializing base register " << BaseReg << diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 1abb8f2..fa6b450 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -109,7 +109,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { assert(N->getParent() != 0 && "machine instruction not in a basic block"); // Remove from the use/def lists. - N->RemoveRegOperandsFromUseLists(); + if (MachineFunction *MF = N->getParent()->getParent()) + N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); N->setParent(0); @@ -271,11 +272,9 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { } if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } - if (Alignment) { + if (Alignment) OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) << " bytes)"; - Comma = ", "; - } OS << '\n'; @@ -312,8 +311,11 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { if (!succ_empty()) { if (Indexes) OS << '\t'; OS << " Successors according to CFG:"; - for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) + for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { OS << " BB#" << (*SI)->getNumber(); + if (!Weights.empty()) + OS << '(' << *getWeightIterator(SI) << ')'; + } OS << '\n'; } } @@ -479,18 +481,42 @@ MachineBasicBlock::removeSuccessor(succ_iterator I) { void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New) { - uint32_t weight = 0; - succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old); + if (Old == New) + return; - // If Weight list is empty it means we don't use it (disabled optimization). - if (!Weights.empty()) { - weight_iterator WI = getWeightIterator(SI); - weight = *WI; + succ_iterator E = succ_end(); + succ_iterator NewI = E; + succ_iterator OldI = E; + for (succ_iterator I = succ_begin(); I != E; ++I) { + if (*I == Old) { + OldI = I; + if (NewI != E) + break; + } + if (*I == New) { + NewI = I; + if (OldI != E) + break; + } } + assert(OldI != E && "Old is not a successor of this block"); + Old->removePredecessor(this); - // Update the successor information. - removeSuccessor(SI); - addSuccessor(New, weight); + // If New isn't already a successor, let it take Old's place. + if (NewI == E) { + New->addPredecessor(this); + *OldI = New; + return; + } + + // New is already a successor. + // Update its weight instead of adding a duplicate edge. + if (!Weights.empty()) { + weight_iterator OldWI = getWeightIterator(OldI); + *getWeightIterator(NewI) += *OldWI; + Weights.erase(OldWI); + } + Successors.erase(OldI); } void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { @@ -509,14 +535,13 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - uint32_t weight = 0; - + uint32_t Weight = 0; // If Weight list is empty it means we don't use it (disabled optimization). if (!fromMBB->Weights.empty()) - weight = *fromMBB->Weights.begin(); + Weight = *fromMBB->Weights.begin(); - addSuccessor(Succ, weight); + addSuccessor(Succ, Weight); fromMBB->removeSuccessor(Succ); } } @@ -528,7 +553,10 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - addSuccessor(Succ); + uint32_t Weight = 0; + if (!fromMBB->Weights.empty()) + Weight = *fromMBB->Weights.begin(); + addSuccessor(Succ, Weight); fromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. @@ -542,9 +570,12 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { } } +bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const { + return std::find(pred_begin(), pred_end(), MBB) != pred_end(); +} + bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { - const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB); - return I != Successors.end(); + return std::find(succ_begin(), succ_end(), MBB) != succ_end(); } bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { @@ -596,6 +627,11 @@ bool MachineBasicBlock::canFallThrough() { MachineBasicBlock * MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { + // Splitting the critical edge to a landing pad block is non-trivial. Don't do + // it in this generic function. + if (Succ->isLandingPad()) + return NULL; + MachineFunction *MF = getParent(); DebugLoc dl; // FIXME: this is nowhere @@ -670,7 +706,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Inherit live-ins from the successor for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(), - E = Succ->livein_end(); I != E; ++I) + E = Succ->livein_end(); I != E; ++I) NMBB->addLiveIn(*I); // Update LiveVariables. diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 5ba6851..c4dca2c 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -11,7 +11,7 @@ // structure and branch probability estimates. // // The pass strives to preserve the structure of the CFG (that is, retain -// a topological ordering of basic blocks) in the absense of a *strong* signal +// a topological ordering of basic blocks) in the absence of a *strong* signal // to the contrary from probabilities. However, within the CFG structure, it // attempts to choose an ordering which favors placing more likely sequences of // blocks adjacent to each other. @@ -63,17 +63,13 @@ namespace { /// /// This is the datastructure representing a chain of consecutive blocks that /// are profitable to layout together in order to maximize fallthrough -/// probabilities. We also can use a block chain to represent a sequence of -/// basic blocks which have some external (correctness) requirement for -/// sequential layout. +/// probabilities and code locality. We also can use a block chain to represent +/// a sequence of basic blocks which have some external (correctness) +/// requirement for sequential layout. /// -/// Eventually, the block chains will form a directed graph over the function. -/// We provide an SCC-supporting-iterator in order to quicky build and walk the -/// SCCs of block chains within a function. -/// -/// The block chains also have support for calculating and caching probability -/// information related to the chain itself versus other chains. This is used -/// for ranking during the final layout of block chains. +/// Chains can be built around a single basic block and can be merged to grow +/// them. They participate in a block-to-chain mapping, which is updated +/// automatically as chains are merged together. class BlockChain { /// \brief The sequence of blocks belonging to this chain. /// @@ -179,10 +175,11 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief Allocator and owner of BlockChain structures. /// - /// We build BlockChains lazily by merging together high probability BB - /// sequences acording to the "Algo2" in the paper mentioned at the top of - /// the file. To reduce malloc traffic, we allocate them using this slab-like - /// allocator, and destroy them after the pass completes. + /// We build BlockChains lazily while processing the loop structure of + /// a function. To reduce malloc traffic, we allocate them using this + /// slab-like allocator, and destroy them after the pass completes. An + /// important guarantee is that this allocator produces stable pointers to + /// the chains. SpecificBumpPtrAllocator<BlockChain> ChainAllocator; /// \brief Function wide BasicBlock to BlockChain mapping. @@ -329,7 +326,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( // the MBPI analysis, we manually compute probabilities using the edge // weights. This is suboptimal as it means that the somewhat subtle // definition of edge weight semantics is encoded here as well. We should - // improve the MBPI interface to effeciently support query patterns such as + // improve the MBPI interface to efficiently support query patterns such as // this. uint32_t BestWeight = 0; uint32_t WeightScale = 0; @@ -988,8 +985,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // boiler plate. Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. - if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // If PrevBB has a two-way branch, try to re-order the branches + // such that we branch to the successor with higher weight first. + if (TBB && !Cond.empty() && FBB && + MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) && + !TII->ReverseBranchCondition(Cond)) { + DEBUG(dbgs() << "Reverse order of the two branches: " + << getBlockName(PrevBB) << "\n"); + DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) + << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PrevBB); + TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + } PrevBB->updateTerminator(); + } } // Fixup the last block. @@ -1000,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Walk through the backedges of the function now that we have fully laid out // the basic blocks and align the destination of each backedge. We don't rely - // on the loop info here so that we can align backedges in unnatural CFGs and - // backedges that were introduced purely because of the loop rotations done - // during this layout pass. - // FIXME: This isn't quite right, we shouldn't align backedges that result - // from blocks being sunken below the exit block for the function. + // exclusively on the loop info here so that we can align backedges in + // unnatural CFGs and backedges that were introduced purely because of the + // loop rotations done during this layout pass. if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) return; // Don't care about loop alignment. + if (FunctionChain.begin() == FunctionChain.end()) + return; // Empty chain. - SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks; - for (BlockChain::iterator BI = FunctionChain.begin(), + const BranchProbability ColdProb(1, 5); // 20% + BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); + BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; + for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()), BE = FunctionChain.end(); BI != BE; ++BI) { - PreviousBlocks.insert(*BI); - // Set alignment on the destination of all the back edges in the new - // ordering. - for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(), - SE = (*BI)->succ_end(); - SI != SE; ++SI) - if (PreviousBlocks.count(*SI)) - (*SI)->setAlignment(Align); + // Don't align non-looping basic blocks. These are unlikely to execute + // enough times to matter in practice. Note that we'll still handle + // unnatural CFGs inside of a natural outer loop (the common case) and + // rotated loops. + MachineLoop *L = MLI->getLoopFor(*BI); + if (!L) + continue; + + // If the block is cold relative to the function entry don't waste space + // aligning it. + BlockFrequency Freq = MBFI->getBlockFreq(*BI); + if (Freq < WeightedEntryFreq) + continue; + + // If the block is cold relative to its loop header, don't align it + // regardless of what edges into the block exist. + MachineBasicBlock *LoopHeader = L->getHeader(); + BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader); + if (Freq < (LoopHeaderFreq * ColdProb)) + continue; + + // Check for the existence of a non-layout predecessor which would benefit + // from aligning this block. + MachineBasicBlock *LayoutPred = *llvm::prior(BI); + + // Force alignment if all the predecessors are jumps. We already checked + // that the block isn't cold above. + if (!LayoutPred->isSuccessor(*BI)) { + (*BI)->setAlignment(Align); + continue; + } + + // Align this block if the layout predecessor's edge into this block is + // cold relative to the block. When this is true, othe predecessors make up + // all of the hot entries into the block and thus alignment is likely to be + // important. + BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); + BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; + if (LayoutEdgeFreq <= (Freq * ColdProb)) + (*BI)->setAlignment(Align); } } @@ -1053,7 +1098,7 @@ namespace { /// /// A separate pass to compute interesting statistics for evaluating block /// placement. This is separate from the actual placement pass so that they can -/// be computed in the absense of any placement transformations or when using +/// be computed in the absence of any placement transformations or when using /// alternative placement strategies. class MachineBlockPlacementStats : public MachineFunctionPass { /// \brief A handle to the branch probability pass. diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index a63688e..896461f 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -84,7 +84,7 @@ namespace { bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, MachineBasicBlock::const_iterator I, - MachineBasicBlock::const_iterator E) const ; + MachineBasicBlock::const_iterator E) const; bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, @@ -100,8 +100,7 @@ namespace { void ExitScope(MachineBasicBlock *MBB); bool ProcessBlock(MachineBasicBlock *MBB); void ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, - DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); }; } // end anonymous namespace @@ -216,11 +215,12 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, if (MO.isDef() && (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) continue; - PhysRefs.insert(Reg); + // Reading constant physregs is ok. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); if (MO.isDef()) PhysDefs.push_back(Reg); - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) - PhysRefs.insert(*Alias); } return !PhysRefs.empty(); @@ -326,6 +326,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. + // If CSReg is used at all uses of Reg, CSE should not increase register + // pressure of CSReg. + bool MayIncreasePressure = true; + if (TargetRegisterInfo::isVirtualRegister(CSReg) && + TargetRegisterInfo::isVirtualRegister(Reg)) { + MayIncreasePressure = false; + SmallPtrSet<MachineInstr*, 8> CSUses; + for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + CSUses.insert(Use); + } + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + if (!CSUses.count(Use)) { + MayIncreasePressure = true; + break; + } + } + } + if (!MayIncreasePressure) return true; + // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. @@ -396,6 +419,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; + SmallVector<unsigned, 2> ImplicitDefsToUpdate; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -437,7 +461,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // used, then it's not safe to replace it with a common subexpression. // It's also not safe if the instruction uses physical registers. bool CrossMBBPhysDef = false; - SmallSet<unsigned,8> PhysRefs; + SmallSet<unsigned, 8> PhysRefs; SmallVector<unsigned, 2> PhysDefs; if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) { FoundCSE = false; @@ -465,21 +489,31 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Check if it's profitable to perform this CSE. bool DoCSE = true; - unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumDefs = MI->getDesc().getNumDefs() + + MI->getDesc().getNumImplicitDefs(); + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned OldReg = MO.getReg(); unsigned NewReg = CSMI->getOperand(i).getReg(); - if (OldReg == NewReg) + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) + ImplicitDefsToUpdate.push_back(i); + if (OldReg == NewReg) { + --NumDefs; continue; + } assert(TargetRegisterInfo::isVirtualRegister(OldReg) && TargetRegisterInfo::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { + DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); DoCSE = false; break; } @@ -488,6 +522,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // within the register class of the new instruction. const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); if (!MRI->constrainRegClass(NewReg, OldRC)) { + DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); DoCSE = false; break; } @@ -503,6 +538,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { MRI->clearKillFlags(CSEPairs[i].second); } + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) + CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); + if (CrossMBBPhysDef) { // Add physical register defs now coming in from a predecessor to MBB // livein list. @@ -522,11 +562,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { ++NumCommutes; Changed = true; } else { - DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); Exps.push_back(MI); } CSEPairs.clear(); + ImplicitDefsToUpdate.clear(); } return Changed; @@ -537,8 +577,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { /// up the dominator tree to destroy ancestors which are now done. void MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, - DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) { if (OpenChildren[Node]) return; @@ -546,7 +585,7 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, ExitScope(Node->getBlock()); // Now traverse upwards to pop ancestors whose offsprings are all done. - while (MachineDomTreeNode *Parent = ParentMap[Node]) { + while (MachineDomTreeNode *Parent = Node->getIDom()) { unsigned Left = --OpenChildren[Parent]; if (Left != 0) break; @@ -558,7 +597,6 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { SmallVector<MachineDomTreeNode*, 32> Scopes; SmallVector<MachineDomTreeNode*, 8> WorkList; - DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; CurrVN = 0; @@ -573,7 +611,6 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { OpenChildren[Node] = NumChildren; for (unsigned i = 0; i != NumChildren; ++i) { MachineDomTreeNode *Child = Children[i]; - ParentMap[Child] = Node; WorkList.push_back(Child); } } while (!WorkList.empty()); @@ -586,7 +623,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { EnterScope(MBB); Changed |= ProcessBlock(MBB); // If it's a leaf node, it's done. Traverse upwards to pop ancestors. - ExitScopeIfDone(Node, OpenChildren, ParentMap); + ExitScopeIfDone(Node, OpenChildren); } return Changed; diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 9730eaa..bac3aa2 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -62,28 +62,16 @@ void MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, SourceMap &SrcMap, DenseMap<unsigned, MachineInstr*> &AvailCopyMap) { - SourceMap::iterator SI = SrcMap.find(Reg); - if (SI != SrcMap.end()) { - const DestList& Defs = SI->second; - for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); - I != E; ++I) { - unsigned MappedDef = *I; - // Source of copy is no longer available for propagation. - if (AvailCopyMap.erase(MappedDef)) { - for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) - AvailCopyMap.erase(*SR); - } - } - } - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - SI = SrcMap.find(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + SourceMap::iterator SI = SrcMap.find(*AI); if (SI != SrcMap.end()) { const DestList& Defs = SI->second; for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); I != E; ++I) { unsigned MappedDef = *I; + // Source of copy is no longer available for propagation. if (AvailCopyMap.erase(MappedDef)) { - for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR) AvailCopyMap.erase(*SR); } } @@ -188,11 +176,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { } // If Src is defined by a previous copy, it cannot be eliminated. - CI = CopyMap.find(Src); - if (CI != CopyMap.end()) - MaybeDeadCopies.remove(CI->second); - for (const uint16_t *AS = TRI->getAliasSet(Src); *AS; ++AS) { - CI = CopyMap.find(*AS); + for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) { + CI = CopyMap.find(*AI); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } @@ -211,13 +196,13 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Remember Def is defined by the copy. // ... Make sure to clear the def maps of aliases first. - for (const uint16_t *AS = TRI->getAliasSet(Def); *AS; ++AS) { - CopyMap.erase(*AS); - AvailCopyMap.erase(*AS); + for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) { + CopyMap.erase(*AI); + AvailCopyMap.erase(*AI); } CopyMap[Def] = MI; AvailCopyMap[Def] = MI; - for (const uint16_t *SR = TRI->getSubRegisters(Def); *SR; ++SR) { + for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) { CopyMap[*SR] = MI; AvailCopyMap[*SR] = MI; } @@ -256,11 +241,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If 'Reg' is defined by a copy, the copy is no longer a candidate // for elimination. - DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg); - if (CI != CopyMap.end()) - MaybeDeadCopies.remove(CI->second); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - CI = CopyMap.find(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } @@ -296,11 +278,9 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { unsigned Reg = Defs[i]; // No longer defined by a copy. - CopyMap.erase(Reg); - AvailCopyMap.erase(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - CopyMap.erase(*AS); - AvailCopyMap.erase(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + CopyMap.erase(*AI); + AvailCopyMap.erase(*AI); } // If 'Reg' is previously source of a copy, it is no longer available for diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index d8c2f6a..d4aede8a 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -26,7 +27,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -60,7 +60,7 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, MFInfo = 0; FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering()); if (Fn->hasFnAttr(Attribute::StackAlignment)) - FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs( + FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs( Fn->getAttributes().getFnAttributes())); ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData()); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); @@ -84,9 +84,13 @@ MachineFunction::~MachineFunction() { MFInfo->~MachineFunctionInfo(); Allocator.Deallocate(MFInfo); } - FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo); - ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool); - + + FrameInfo->~MachineFrameInfo(); + Allocator.Deallocate(FrameInfo); + + ConstantPool->~MachineConstantPool(); + Allocator.Deallocate(ConstantPool); + if (JumpTableInfo) { JumpTableInfo->~MachineJumpTableInfo(); Allocator.Deallocate(JumpTableInfo); @@ -98,7 +102,7 @@ MachineFunction::~MachineFunction() { MachineJumpTableInfo *MachineFunction:: getOrCreateJumpTableInfo(unsigned EntryKind) { if (JumpTableInfo) return JumpTableInfo; - + JumpTableInfo = new (Allocator) MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind); return JumpTableInfo; @@ -116,12 +120,12 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { MBBI = begin(); else MBBI = MBB; - + // Figure out the block number this should have. unsigned BlockNo = 0; if (MBBI != begin()) BlockNo = prior(MBBI)->getNumber()+1; - + for (; MBBI != E; ++MBBI, ++BlockNo) { if (MBBI->getNumber() != (int)BlockNo) { // Remove use of the old number. @@ -130,7 +134,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { "MBB number mismatch!"); MBBNumbering[MBBI->getNumber()] = 0; } - + // If BlockNo is already taken, set that block's number to -1. if (MBBNumbering[BlockNo]) MBBNumbering[BlockNo]->setNumber(-1); @@ -138,7 +142,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { MBBNumbering[BlockNo] = MBBI; MBBI->setNumber(BlockNo); } - } + } // Okay, all the blocks are renumbered. If we have compactified the block // numbering, shrink MBBNumbering now. @@ -295,16 +299,16 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { // Print Frame Information FrameInfo->print(*this, OS); - + // Print JumpTable Information if (JumpTableInfo) JumpTableInfo->print(OS); // Print Constant Pool ConstantPool->print(OS); - + const TargetRegisterInfo *TRI = getTarget().getRegisterInfo(); - + if (RegInfo && !RegInfo->livein_empty()) { OS << "Function Live Ins: "; for (MachineRegisterInfo::livein_iterator @@ -324,7 +328,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << ' ' << PrintReg(*I, TRI); OS << '\n'; } - + for (const_iterator BB = begin(), E = end(); BB != E; ++BB) { OS << '\n'; BB->print(OS, Indexes); @@ -411,10 +415,9 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { assert(JumpTableInfo && "No jump tables"); - assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); const MCAsmInfo &MAI = *getTarget().getMCAsmInfo(); - + const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() : MAI.getPrivateGlobalPrefix(); SmallString<60> Name; @@ -691,7 +694,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, else if (B->getType() != IntTy) B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, const_cast<Constant*>(B), TD); - + return A == B; } @@ -714,7 +717,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, Constants[i].Alignment = Alignment; return i; } - + Constants.push_back(MachineConstantPoolEntry(C, Alignment)); return Constants.size()-1; } @@ -723,7 +726,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, unsigned Alignment) { assert(Alignment && "Alignment must be specified!"); if (Alignment > PoolAlignment) PoolAlignment = Alignment; - + // Check to see if we already have this constant. // // FIXME, this could be made much more efficient for large constant pools. diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index 2aaa798..0102ac7 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -14,7 +14,9 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -28,6 +30,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { raw_ostream &OS; const std::string Banner; + MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { } MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) : MachineFunctionPass(ID), OS(os), Banner(banner) {} @@ -40,7 +43,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) { OS << "# " << Banner << ":\n"; - MF.print(OS); + MF.print(OS, getAnalysisIfAvailable<SlotIndexes>()); return false; } }; @@ -48,6 +51,10 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { char MachineFunctionPrinterPass::ID = 0; } +char &MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; +INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs", + "Machine Function Printer", false, false) + namespace llvm { /// Returns a newly-created MachineFunction Printer pass. The /// default banner is empty. diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e553a04..b166849 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/InlineAsm.h" #include "llvm/LLVMContext.h" @@ -33,7 +34,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LeakDetector.h" @@ -47,55 +47,6 @@ using namespace llvm; // MachineOperand Implementation //===----------------------------------------------------------------------===// -/// AddRegOperandToRegInfo - Add this register operand to the specified -/// MachineRegisterInfo. If it is null, then the next/prev fields should be -/// explicitly nulled out. -void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) { - assert(isReg() && "Can only add reg operand to use lists"); - - // If the reginfo pointer is null, just explicitly null out or next/prev - // pointers, to ensure they are not garbage. - if (RegInfo == 0) { - Contents.Reg.Prev = 0; - Contents.Reg.Next = 0; - return; - } - - // Otherwise, add this operand to the head of the registers use/def list. - MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg()); - - // For SSA values, we prefer to keep the definition at the start of the list. - // we do this by skipping over the definition if it is at the head of the - // list. - if (*Head && (*Head)->isDef()) - Head = &(*Head)->Contents.Reg.Next; - - Contents.Reg.Next = *Head; - if (Contents.Reg.Next) { - assert(getReg() == Contents.Reg.Next->getReg() && - "Different regs on the same list!"); - Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next; - } - - Contents.Reg.Prev = Head; - *Head = this; -} - -/// RemoveRegOperandFromRegInfo - Remove this register operand from the -/// MachineRegisterInfo it is linked with. -void MachineOperand::RemoveRegOperandFromRegInfo() { - assert(isOnRegUseList() && "Reg operand is not on a use list"); - // Unlink this from the doubly linked list of operands. - MachineOperand *NextOp = Contents.Reg.Next; - *Contents.Reg.Prev = NextOp; - if (NextOp) { - assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!"); - NextOp->Contents.Reg.Prev = Contents.Reg.Prev; - } - Contents.Reg.Prev = 0; - Contents.Reg.Next = 0; -} - void MachineOperand::setReg(unsigned Reg) { if (getReg() == Reg) return; // No change. @@ -105,9 +56,10 @@ void MachineOperand::setReg(unsigned Reg) { if (MachineInstr *MI = getParent()) if (MachineBasicBlock *MBB = MI->getParent()) if (MachineFunction *MF = MBB->getParent()) { - RemoveRegOperandFromRegInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); SmallContents.RegNo = Reg; - AddRegOperandToRegInfo(&MF->getRegInfo()); + MRI.addRegOperandToUseList(this); return; } @@ -136,15 +88,36 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { setReg(Reg); } +/// Change a def to a use, or a use to a def. +void MachineOperand::setIsDef(bool Val) { + assert(isReg() && "Wrong MachineOperand accessor"); + assert((!Val || !isDebug()) && "Marking a debug operation as def"); + if (IsDef == Val) + return; + // MRI may keep uses and defs in different list positions. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + IsDef = Val; + MRI.addRegOperandToUseList(this); + return; + } + IsDef = Val; +} + /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. void MachineOperand::ChangeToImmediate(int64_t ImmVal) { // If this operand is currently a register operand, and if this is in a // function, deregister the operand from the register's use/def list. - if (isReg() && getParent() && getParent()->getParent() && - getParent()->getParent()->getParent()) - RemoveRegOperandFromRegInfo(); + if (isReg() && isOnRegUseList()) + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + MF->getRegInfo().removeRegOperandFromUseList(this); OpKind = MO_Immediate; Contents.ImmVal = ImmVal; @@ -156,24 +129,20 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) { void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, bool isKill, bool isDead, bool isUndef, bool isDebug) { - // If this operand is already a register operand, use setReg to update the + MachineRegisterInfo *RegInfo = 0; + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + RegInfo = &MF->getRegInfo(); + // If this operand is already a register operand, remove it from the // register's use/def lists. - if (isReg()) { - assert(!isEarlyClobber()); - setReg(Reg); - } else { - // Otherwise, change this to a register and set the reg#. - OpKind = MO_Register; - SmallContents.RegNo = Reg; - - // If this operand is embedded in a function, add the operand to the - // register's use/def list. - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) - AddRegOperandToRegInfo(&MF->getRegInfo()); - } + if (RegInfo && isReg()) + RegInfo->removeRegOperandFromUseList(this); + // Change this to a register and set the reg#. + OpKind = MO_Register; + SmallContents.RegNo = Reg; + SubReg = 0; IsDef = isDef; IsImp = isImp; IsKill = isKill; @@ -182,11 +151,18 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsInternalRead = false; IsEarlyClobber = false; IsDebug = isDebug; - SubReg = 0; + // Ensure isOnRegUseList() returns false. + Contents.Reg.Prev = 0; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (RegInfo) + RegInfo->addRegOperandToUseList(this); } /// isIdenticalTo - Return true if this operand is identical to the specified -/// operand. +/// operand. Note that this should stay in sync with the hash_value overload +/// below. bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { if (getType() != Other.getType() || getTargetFlags() != Other.getTargetFlags()) @@ -207,6 +183,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { case MachineOperand::MO_FrameIndex: return getIndex() == Other.getIndex(); case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); case MachineOperand::MO_JumpTableIndex: return getIndex() == Other.getIndex(); @@ -227,6 +204,47 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { llvm_unreachable("Invalid machine operand type"); } +// Note: this must stay exactly in sync with isIdenticalTo above. +hash_code llvm::hash_value(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(), + MO.getSubReg(), MO.isDef()); + case MachineOperand::MO_Immediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); + case MachineOperand::MO_CImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm()); + case MachineOperand::MO_FPImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm()); + case MachineOperand::MO_MachineBasicBlock: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB()); + case MachineOperand::MO_FrameIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(), + MO.getOffset()); + case MachineOperand::MO_JumpTableIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ExternalSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), + MO.getSymbolName()); + case MachineOperand::MO_GlobalAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(), + MO.getOffset()); + case MachineOperand::MO_BlockAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getBlockAddress()); + case MachineOperand::MO_RegisterMask: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + case MachineOperand::MO_Metadata: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); + case MachineOperand::MO_MCSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); + } + llvm_unreachable("Invalid machine operand type"); +} + /// print - Print the specified machine operand. /// void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { @@ -255,12 +273,16 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "imp-"; OS << "def"; NeedComma = true; + // <def,read-undef> only makes sense when getSubReg() is set. + // Don't clutter the output otherwise. + if (isUndef() && getSubReg()) + OS << ",read-undef"; } else if (isImplicit()) { OS << "imp-use"; NeedComma = true; } - if (isKill() || isDead() || isUndef() || isInternalRead()) { + if (isKill() || isDead() || (isUndef() && isUse()) || isInternalRead()) { if (NeedComma) OS << ','; NeedComma = false; if (isKill()) { @@ -271,7 +293,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "dead"; NeedComma = true; } - if (isUndef()) { + if (isUndef() && isUse()) { if (NeedComma) OS << ','; OS << "undef"; NeedComma = true; @@ -308,6 +330,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; + case MachineOperand::MO_TargetIndex: + OS << "<ti#" << getIndex(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; case MachineOperand::MO_JumpTableIndex: OS << "<jt#" << getIndex() << '>'; break; @@ -605,24 +632,21 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands already be on their use lists. -void MachineInstr::RemoveRegOperandsFromUseLists() { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { +void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); - } + MRI.removeRegOperandFromUseList(&Operands[i]); } /// AddRegOperandsToUseLists - Add all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands not be on their use lists yet. -void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { +void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(&RegInfo); - } + MRI.addRegOperandToUseList(&Operands[i]); } - /// addOperand - Add the specified operand to the instruction. If it is an /// implicit operand, it is added to the end of the operand list. If it is /// an explicit operand it is added at the end of the explicit operand list @@ -650,13 +674,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) { while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { --OpNo; if (RegInfo) - Operands[OpNo].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[OpNo]); } } // OpNo now points as the desired insertion point. Unless this is a variadic // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). - assert((isImpReg || MCID->isVariadic() || OpNo < MCID->getNumOperands()) && + // RegMask operands go between the explicit and implicit operands. + assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || + OpNo < MCID->getNumOperands()) && "Trying to add an operand to a machine instr that is already done!"); // All operands from OpNo have been removed from RegInfo. If the Operands @@ -665,7 +691,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Reallocate) for (unsigned i = 0; i != OpNo; ++i) if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[i]); // Insert the new operand at OpNo. Operands.insert(Operands.begin() + OpNo, Op); @@ -676,13 +702,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Reallocate) for (unsigned i = 0; i != OpNo; ++i) if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); // When adding a register operand, tell RegInfo about it. if (Operands[OpNo].isReg()) { - // Add the new operand to RegInfo, even when RegInfo is NULL. - // This will initialize the linked list pointers. - Operands[OpNo].AddRegOperandToRegInfo(RegInfo); + // Ensure isOnRegUseList() returns false, regardless of Op's status. + Operands[OpNo].Contents.Reg.Prev = 0; + // Add the new operand to RegInfo. + if (RegInfo) + RegInfo->addRegOperandToUseList(&Operands[OpNo]); // If the register operand is flagged as early, mark the operand as such. if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); @@ -692,7 +720,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (RegInfo) { for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) { assert(Operands[i].isReg() && "Should only be an implicit reg!"); - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); } } } @@ -702,12 +730,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) { /// void MachineInstr::RemoveOperand(unsigned OpNo) { assert(OpNo < Operands.size() && "Invalid operand number"); + MachineRegisterInfo *RegInfo = getRegInfo(); // Special case removing the last one. if (OpNo == Operands.size()-1) { // If needed, remove from the reg def/use list. - if (Operands.back().isReg() && Operands.back().isOnRegUseList()) - Operands.back().RemoveRegOperandFromRegInfo(); + if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList()) + RegInfo->removeRegOperandFromUseList(&Operands.back()); Operands.pop_back(); return; @@ -716,11 +745,10 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { // Otherwise, we are removing an interior operand. If we have reginfo to // update, remove all operands that will be shifted down from their reg lists, // move everything down, then re-add them. - MachineRegisterInfo *RegInfo = getRegInfo(); if (RegInfo) { for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[i]); } } @@ -729,7 +757,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { if (RegInfo) { for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); } } } @@ -868,7 +896,8 @@ void MachineInstr::eraseFromParent() { MBB->erase(MI); } } - getParent()->erase(this); + // Erase the individual instruction, which may itself be inside a bundle. + getParent()->erase_instr(this); } @@ -938,9 +967,13 @@ const TargetRegisterClass* MachineInstr::getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { + assert(getParent() && "Can't have an MBB reference here!"); + assert(getParent()->getParent() && "Can't have an MF reference here!"); + const MachineFunction &MF = *getParent()->getParent(); + // Most opcodes have fixed constraints in their MCInstrDesc. if (!isInlineAsm()) - return TII->getRegClass(getDesc(), OpIdx, TRI); + return TII->getRegClass(getDesc(), OpIdx, TRI, MF); if (!getOperand(OpIdx).isReg()) return NULL; @@ -962,7 +995,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, // Assume that all registers in a memory operand are pointers. if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem) - return TRI->getPointerRegClass(); + return TRI->getPointerRegClass(MF); return NULL; } @@ -1530,12 +1563,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { const MachineRegisterInfo &MRI = MF->getRegInfo(); if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) { bool HasAliasLive = false; - for (const uint16_t *Alias = TM->getRegisterInfo()->getAliasSet(Reg); - unsigned AliasReg = *Alias; ++Alias) + for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true); + AI.isValid(); ++AI) { + unsigned AliasReg = *AI; if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) { HasAliasLive = true; break; } + } if (!HasAliasLive) { OmittedAnyCallClobbers = true; continue; @@ -1667,7 +1702,8 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); - bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg); + bool hasAliases = isPhysReg && + MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); bool Found = false; SmallVector<unsigned,4> DeadOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { @@ -1739,7 +1775,8 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); - bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg); + bool hasAliases = isPhysReg && + MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); bool Found = false; SmallVector<unsigned,4> DeadOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { @@ -1758,9 +1795,7 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, // There exists a super-register that's marked dead. if (RegInfo->isSuperRegister(IncomingReg, Reg)) return true; - if (RegInfo->getSubRegisters(IncomingReg) && - RegInfo->getSuperRegisters(Reg) && - RegInfo->isSubRegister(IncomingReg, Reg)) + if (RegInfo->isSubRegister(IncomingReg, Reg)) DeadOps.push_back(i); } } @@ -1841,52 +1876,16 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, unsigned MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { // Build up a buffer of hash code components. - // - // FIXME: This is a total hack. We should have a hash_value overload for - // MachineOperand, but currently that doesn't work because there are many - // different ideas of "equality" and thus different sets of information that - // contribute to the hash code. This one happens to want to take a specific - // subset. And it's still not clear that this routine uses the *correct* - // subset of information when computing the hash code. The goal is to use the - // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to - // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would - // be very useful to factor the selection of relevant inputs out of the two - // functions and into a common routine, but it's not clear how that can be - // done. SmallVector<size_t, 8> HashComponents; HashComponents.reserve(MI->getNumOperands() + 1); HashComponents.push_back(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - switch (MO.getType()) { - default: break; - case MachineOperand::MO_Register: - if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; // Skip virtual register defs. - HashComponents.push_back(hash_combine(MO.getType(), MO.getReg())); - break; - case MachineOperand::MO_Immediate: - HashComponents.push_back(hash_combine(MO.getType(), MO.getImm())); - break; - case MachineOperand::MO_FrameIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_JumpTableIndex: - HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex())); - break; - case MachineOperand::MO_MachineBasicBlock: - HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB())); - break; - case MachineOperand::MO_GlobalAddress: - HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal())); - break; - case MachineOperand::MO_BlockAddress: - HashComponents.push_back(hash_combine(MO.getType(), - MO.getBlockAddress())); - break; - case MachineOperand::MO_MCSymbol: - HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol())); - break; - } + if (MO.isReg() && MO.isDef() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; // Skip virtual register defs. + + HashComponents.push_back(hash_value(MO)); } return hash_combine_range(HashComponents.begin(), HashComponents.end()); } diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index 73489a7..b7de7bf 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -169,8 +169,8 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, } if (!MO.isDead()) { - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; if (LocalDefSet.insert(SubReg)) LocalDefs.push_back(SubReg); } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 8c562cc..efec481 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -445,8 +445,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI, } if (MO.isImplicit()) { - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) - PhysRegClobbers.set(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRegClobbers.set(*AI); if (!MO.isDead()) // Non-dead implicit def? This cannot be hoisted. RuledOut = true; @@ -465,7 +465,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, // If we have already seen another instruction that defines the same // register, then this is not safe. Two defs is indicated by setting a // PhysRegClobbers bit. - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) { + for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { if (PhysRegDefs.test(*AS)) PhysRegClobbers.set(*AS); if (PhysRegClobbers.test(*AS)) @@ -517,8 +517,8 @@ void MachineLICM::HoistRegionPostRA() { for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), E = BB->livein_end(); I != E; ++I) { unsigned Reg = *I; - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) - PhysRegDefs.set(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRegDefs.set(*AI); } SpeculationState = SpeculateUnknown; @@ -540,8 +540,8 @@ void MachineLICM::HoistRegionPostRA() { unsigned Reg = MO.getReg(); if (!Reg) continue; - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) - TermRegs.set(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + TermRegs.set(*AI); } } @@ -1260,11 +1260,11 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { if (NewOpc == 0) return 0; const MCInstrDesc &MID = TII->get(NewOpc); if (MID.getNumDefs() != 1) return 0; - const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI); + MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); // Ok, we're unfolding. Create a temporary register and do the unfold. unsigned Reg = MRI->createVirtualRegister(RC); - MachineFunction &MF = *MI->getParent()->getParent(); SmallVector<MachineInstr *, 2> NewMIs; bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 189cb2b..9f3829e 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -9,7 +9,7 @@ // // This file defines the MachineLoopInfo class that is used to identify natural // loops and determine the loop depth of various nodes of the CFG. Note that -// the loops identified may actually be several natural loops that share the +// the loops identified may actually be several natural loops that share the // same header node... not just a single natural loop. // //===----------------------------------------------------------------------===// @@ -17,17 +17,13 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Support/Debug.h" using namespace llvm; -namespace llvm { -#define MLB class LoopBase<MachineBasicBlock, MachineLoop> -TEMPLATE_INSTANTIATION(MLB); -#undef MLB -#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop> -TEMPLATE_INSTANTIATION(MLIB); -#undef MLIB -} +// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops. +template class llvm::LoopBase<MachineBasicBlock, MachineLoop>; +template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>; char MachineLoopInfo::ID = 0; INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops", @@ -40,7 +36,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { releaseMemory(); - LI.Calculate(getAnalysis<MachineDominatorTree>().getBase()); // Update + LI.Analyze(getAnalysis<MachineDominatorTree>().getBase()); return false; } diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp index 58e067b..cb204fd 100644 --- a/lib/CodeGen/MachinePassRegistry.cpp +++ b/lib/CodeGen/MachinePassRegistry.cpp @@ -18,6 +18,19 @@ using namespace llvm; void MachinePassRegistryListener::anchor() { } +/// setDefault - Set the default constructor by name. +void MachinePassRegistry::setDefault(StringRef Name) { + MachinePassCtor Ctor = 0; + for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) { + if (R->getName() == Name) { + Ctor = R->getCtor(); + break; + } + } + assert(Ctor && "Unregistered pass name"); + setDefault(Ctor); +} + /// Add - Adds a function pass to the registration list. /// void MachinePassRegistry::Add(MachinePassRegistryNode *Node) { diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 7ea1517..5fb938f 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -102,17 +102,9 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ // New virtual register number. unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); - - // Add a reg, but keep track of whether the vector reallocated or not. - const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0); - void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg]; VRegInfo.grow(Reg); VRegInfo[Reg].first = RegClass; RegAllocHints.grow(Reg); - - if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase) - // The vector reallocated, handle this now. - HandleVRegListReallocation(); return Reg; } @@ -126,21 +118,68 @@ void MachineRegisterInfo::clearVirtRegs() { VRegInfo.clear(); } -/// HandleVRegListReallocation - We just added a virtual register to the -/// VRegInfo info list and it reallocated. Update the use/def lists info -/// pointers. -void MachineRegisterInfo::HandleVRegListReallocation() { - // The back pointers for the vreg lists point into the previous vector. - // Update them to point to their correct slots. - for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - MachineOperand *List = VRegInfo[Reg].second; - if (!List) continue; - // Update the back-pointer to be accurate once more. - List->Contents.Reg.Prev = &VRegInfo[Reg].second; +/// Add MO to the linked list of operands for its register. +void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { + assert(!MO->isOnRegUseList() && "Already on list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + + // Head points to the first list element. + // Next is NULL on the last list element. + // Prev pointers are circular, so Head->Prev == Last. + + // Head is NULL for an empty list. + if (!Head) { + MO->Contents.Reg.Prev = MO; + MO->Contents.Reg.Next = 0; + HeadRef = MO; + return; + } + assert(MO->getReg() == Head->getReg() && "Different regs on the same list!"); + + // Insert MO between Last and Head in the circular Prev chain. + MachineOperand *Last = Head->Contents.Reg.Prev; + assert(Last && "Inconsistent use list"); + assert(MO->getReg() == Last->getReg() && "Different regs on the same list!"); + Head->Contents.Reg.Prev = MO; + MO->Contents.Reg.Prev = Last; + + // Def operands always precede uses. This allows def_iterator to stop early. + // Insert def operands at the front, and use operands at the back. + if (MO->isDef()) { + // Insert def at the front. + MO->Contents.Reg.Next = Head; + HeadRef = MO; + } else { + // Insert use at the end. + MO->Contents.Reg.Next = 0; + Last->Contents.Reg.Next = MO; } } +/// Remove MO from its use-def list. +void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) { + assert(MO->isOnRegUseList() && "Operand not on use list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + assert(Head && "List already empty"); + + // Unlink this from the doubly linked list of operands. + MachineOperand *Next = MO->Contents.Reg.Next; + MachineOperand *Prev = MO->Contents.Reg.Prev; + + // Prev links are circular, next link is NULL instead of looping back to Head. + if (MO == Head) + HeadRef = Next; + else + Prev->Contents.Reg.Next = Next; + + (Next ? Next : Head)->Contents.Reg.Prev = Prev; + + MO->Contents.Reg.Prev = 0; + MO->Contents.Reg.Next = 0; +} + /// replaceRegWith - Replace all instances of FromReg with ToReg in the /// machine function. This is like llvm-level X->replaceAllUsesWith(Y), /// except that it also changes any definitions of the register as well. @@ -162,14 +201,20 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { // Since we are in SSA form, we can use the first definition. def_iterator I = def_begin(Reg); + assert((I.atEnd() || llvm::next(I) == def_end()) && + "getVRegDef assumes a single definition or no definition"); return !I.atEnd() ? &*I : 0; } -bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { - use_iterator UI = use_begin(RegNo); - if (UI == use_end()) - return false; - return ++UI == use_end(); +/// getUniqueVRegDef - Return the unique machine instr that defines the +/// specified virtual register or null if none is found. If there are +/// multiple definitions or no definition, return null. +MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { + if (def_empty(Reg)) return 0; + def_iterator I = def_begin(Reg); + if (llvm::next(I) != def_end()) + return 0; + return &*I; } bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { @@ -268,15 +313,15 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); // Check if any overlapping register is modified. - for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R) - if (!def_empty(*R)) + for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) + if (!def_empty(*AI)) return false; // Check if any overlapping register is allocatable so it may be used later. if (AllocatableRegs.empty()) AllocatableRegs = TRI->getAllocatableSet(MF); - for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R) - if (AllocatableRegs.test(*R)) + for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) + if (AllocatableRegs.test(*AI)) return false; return true; } diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 070a557..076547a 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -42,7 +42,7 @@ MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, } MachineSSAUpdater::~MachineSSAUpdater() { - delete &getAvailableVals(AV); + delete static_cast<AvailableValsTy*>(AV); } /// Initialize - Reset this object to get ready for a new set of SSA @@ -241,30 +241,6 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { I->second = NewReg; } -/// MachinePHIiter - Iterator for PHI operands. This is used for the -/// PHI_iterator in the SSAUpdaterImpl template. -namespace { - class MachinePHIiter { - private: - MachineInstr *PHI; - unsigned idx; - - public: - explicit MachinePHIiter(MachineInstr *P) // begin iterator - : PHI(P), idx(1) {} - MachinePHIiter(MachineInstr *P, bool) // end iterator - : PHI(P), idx(PHI->getNumOperands()) {} - - MachinePHIiter &operator++() { idx += 2; return *this; } - bool operator==(const MachinePHIiter& x) const { return idx == x.idx; } - bool operator!=(const MachinePHIiter& x) const { return !operator==(x); } - unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); } - MachineBasicBlock *getIncomingBlock() { - return PHI->getOperand(idx+1).getMBB(); - } - }; -} - /// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl /// template, specialized for MachineSSAUpdater. namespace llvm { @@ -279,7 +255,26 @@ public: static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); } static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); } - typedef MachinePHIiter PHI_iterator; + /// Iterator for PHI operands. + class PHI_iterator { + private: + MachineInstr *PHI; + unsigned idx; + + public: + explicit PHI_iterator(MachineInstr *P) // begin iterator + : PHI(P), idx(1) {} + PHI_iterator(MachineInstr *P, bool) // end iterator + : PHI(P), idx(PHI->getNumOperands()) {} + + PHI_iterator &operator++() { idx += 2; return *this; } + bool operator==(const PHI_iterator& x) const { return idx == x.idx; } + bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); } + MachineBasicBlock *getIncomingBlock() { + return PHI->getOperand(idx+1).getMBB(); + } + }; static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } static inline PHI_iterator PHI_end(PhiT *PHI) { return PHI_iterator(PHI, true); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 1d3241b..a1dc948 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -17,9 +17,13 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -50,6 +54,15 @@ static bool ViewMISchedDAGs = false; // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// +MachineSchedContext::MachineSchedContext(): + MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { + RegClassInfo = new RegisterClassInfo(); +} + +MachineSchedContext::~MachineSchedContext() { + delete RegClassInfo; +} + namespace { /// MachineScheduler runs after coalescing and before register allocation. class MachineScheduler : public MachineSchedContext, @@ -122,6 +135,29 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.", /// default scheduler if the target does not set a default. static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); + +/// Decrement this iterator until reaching the top or a non-debug instr. +static MachineBasicBlock::iterator +priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { + assert(I != Beg && "reached the top of the region, cannot decrement"); + while (--I != Beg) { + if (!I->isDebugValue()) + break; + } + return I; +} + +/// If this iterator is a debug value, increment until reaching the End or a +/// non-debug instruction. +static MachineBasicBlock::iterator +nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { + for(; I != End; ++I) { + if (!I->isDebugValue()) + break; + } + return I; +} + /// Top-level MachineScheduler pass driver. /// /// Visit blocks in function order. Divide each block into scheduling regions @@ -139,6 +175,8 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); /// design would be to split blocks at scheduling boundaries, but LLVM has a /// general bias against block splitting purely for implementation simplicity. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); + // Initialize the context of the pass. MF = &mf; MLI = &getAnalysis<MachineLoopInfo>(); @@ -149,6 +187,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { LIS = &getAnalysis<LiveIntervals>(); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + RegClassInfo->runOnMachineFunction(*MF); + // Select the scheduler, or set the default. MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; if (Ctor == useDefaultMachineSched) { @@ -163,13 +203,16 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); // Visit all machine basic blocks. + // + // TODO: Visit blocks in global postorder or postorder within the bottom-up + // loop tree. Then we can optionally compute global RegPressure. for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); MBB != MBBEnd; ++MBB) { Scheduler->startBlock(MBB); // Break the block into scheduling regions [I, RegionEnd), and schedule each - // region as soon as it is discovered. RegionEnd points the the scheduling + // region as soon as it is discovered. RegionEnd points the scheduling // boundary at the bottom of the region. The DAG does not include RegionEnd, // but the region does (i.e. the next RegionEnd is above the previous // RegionBegin). If the current block has no terminator then RegionEnd == @@ -181,6 +224,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { unsigned RemainingCount = MBB->size(); for(MachineBasicBlock::iterator RegionEnd = MBB->end(); RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { + // Avoid decrementing RegionEnd for blocks with no terminator. if (RegionEnd != MBB->end() || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { @@ -207,7 +251,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { Scheduler->exitRegion(); continue; } - DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName() + DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(dbgs() << MF->getFunction()->getName() << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; @@ -260,6 +305,9 @@ public: /// be scheduled at the bottom. virtual SUnit *pickNode(bool &IsTopNode) = 0; + /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled a node. + virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; + /// When all predecessor dependencies have been resolved, free this node for /// top-down scheduling. virtual void releaseTopNode(SUnit *SU) = 0; @@ -279,22 +327,45 @@ namespace { /// machine instructions while updating LiveIntervals. class ScheduleDAGMI : public ScheduleDAGInstrs { AliasAnalysis *AA; + RegisterClassInfo *RegClassInfo; MachineSchedStrategy *SchedImpl; + MachineBasicBlock::iterator LiveRegionEnd; + + /// Register pressure in this region computed by buildSchedGraph. + IntervalPressure RegPressure; + RegPressureTracker RPTracker; + + /// List of pressure sets that exceed the target's pressure limit before + /// scheduling, listed in increasing set ID order. Each pressure set is paired + /// with its max pressure in the currently scheduled regions. + std::vector<PressureElement> RegionCriticalPSets; + /// The top of the unscheduled zone. MachineBasicBlock::iterator CurrentTop; + IntervalPressure TopPressure; + RegPressureTracker TopRPTracker; /// The bottom of the unscheduled zone. MachineBasicBlock::iterator CurrentBottom; + IntervalPressure BotPressure; + RegPressureTracker BotRPTracker; +#ifndef NDEBUG /// The number of instructions scheduled so far. Used to cut off the /// scheduler at the point determined by misched-cutoff. unsigned NumInstrsScheduled; +#endif public: ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), - AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(), - NumInstrsScheduled(0) {} + AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), + RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure), + CurrentBottom(), BotRPTracker(BotPressure) { +#ifndef NDEBUG + NumInstrsScheduled = 0; +#endif + } ~ScheduleDAGMI() { delete SchedImpl; @@ -303,22 +374,68 @@ public: MachineBasicBlock::iterator top() const { return CurrentTop; } MachineBasicBlock::iterator bottom() const { return CurrentBottom; } - /// Implement ScheduleDAGInstrs interface. + /// Implement the ScheduleDAGInstrs interface for handling the next scheduling + /// region. This covers all instructions in a block, while schedule() may only + /// cover a subset. + void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount); + + /// Implement ScheduleDAGInstrs interface for scheduling a sequence of + /// reorderable instructions. void schedule(); + /// Get current register pressure for the top scheduled instructions. + const IntervalPressure &getTopPressure() const { return TopPressure; } + const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } + + /// Get current register pressure for the bottom scheduled instructions. + const IntervalPressure &getBotPressure() const { return BotPressure; } + const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } + + /// Get register pressure for the entire scheduling region before scheduling. + const IntervalPressure &getRegPressure() const { return RegPressure; } + + const std::vector<PressureElement> &getRegionCriticalPSets() const { + return RegionCriticalPSets; + } + + /// getIssueWidth - Return the max instructions per scheduling group. + unsigned getIssueWidth() const { + return (InstrItins && InstrItins->SchedModel) + ? InstrItins->SchedModel->IssueWidth : 1; + } + + /// getNumMicroOps - Return the number of issue slots required for this MI. + unsigned getNumMicroOps(MachineInstr *MI) const { + if (!InstrItins) return 1; + int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass()); + return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI); + } + protected: + void initRegPressure(); + void updateScheduledPressure(std::vector<unsigned> NewMaxPressure); + void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); bool checkSchedLimit(); + void releaseRoots(); + void releaseSucc(SUnit *SU, SDep *SuccEdge); void releaseSuccessors(SUnit *SU); void releasePred(SUnit *SU, SDep *PredEdge); void releasePredecessors(SUnit *SU); + + void placeDebugValues(); }; } // namespace /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When /// NumPredsLeft reaches zero, release the successor node. +/// +/// FIXME: Adjust SuccSU height based on MinLatency. void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); @@ -345,6 +462,8 @@ void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When /// NumSuccsLeft reaches zero, release the predecessor node. +/// +/// FIXME: Adjust PredSU height based on MinLatency. void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); @@ -371,12 +490,17 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) { void ScheduleDAGMI::moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { - // Fix RegionBegin if the first instruction moves down. + // Advance RegionBegin if the first instruction moves down. if (&*RegionBegin == MI) - RegionBegin = llvm::next(RegionBegin); + ++RegionBegin; + + // Update the instruction stream. BB->splice(InsertPos, BB, MI); + + // Update LiveIntervals LIS->handleMove(MI); - // Fix RegionBegin if another instruction moves above the first instruction. + + // Recede RegionBegin if an instruction moves above the first. if (RegionBegin == InsertPos) RegionBegin = MI; } @@ -392,12 +516,114 @@ bool ScheduleDAGMI::checkSchedLimit() { return true; } +/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after +/// crossing a scheduling boundary. [begin, end) includes all instructions in +/// the region, including the boundary itself and single-instruction regions +/// that don't get scheduled. +void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) +{ + ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + + // For convenience remember the end of the liveness region. + LiveRegionEnd = + (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); +} + +// Setup the register pressure trackers for the top scheduled top and bottom +// scheduled regions. +void ScheduleDAGMI::initRegPressure() { + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); + BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Close the RPTracker to finalize live ins. + RPTracker.closeRegion(); + + DEBUG(RPTracker.getPressure().dump(TRI)); + + // Initialize the live ins and live outs. + TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); + BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); + + // Close one end of the tracker so we can call + // getMaxUpward/DownwardPressureDelta before advancing across any + // instructions. This converts currently live regs into live ins/outs. + TopRPTracker.closeTop(); + BotRPTracker.closeBottom(); + + // Account for liveness generated by the region boundary. + if (LiveRegionEnd != RegionEnd) + BotRPTracker.recede(); + + assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); + + // Cache the list of excess pressure sets in this region. This will also track + // the max pressure in the scheduled code for these sets. + RegionCriticalPSets.clear(); + std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; + for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { + unsigned Limit = TRI->getRegPressureSetLimit(i); + if (RegionPressure[i] > Limit) + RegionCriticalPSets.push_back(PressureElement(i, 0)); + } + DEBUG(dbgs() << "Excess PSets: "; + for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) + dbgs() << TRI->getRegPressureSetName( + RegionCriticalPSets[i].PSetID) << " "; + dbgs() << "\n"); +} + +// FIXME: When the pressure tracker deals in pressure differences then we won't +// iterate over all RegionCriticalPSets[i]. +void ScheduleDAGMI:: +updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { + for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { + unsigned ID = RegionCriticalPSets[i].PSetID; + int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; + if ((int)NewMaxPressure[ID] > MaxUnits) + MaxUnits = NewMaxPressure[ID]; + } +} + +// Release all DAG roots for scheduling. +void ScheduleDAGMI::releaseRoots() { + SmallVector<SUnit*, 16> BotRoots; + + for (std::vector<SUnit>::iterator + I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + // A SUnit is ready to top schedule if it has no predecessors. + if (I->Preds.empty()) + SchedImpl->releaseTopNode(&(*I)); + // A SUnit is ready to bottom schedule if it has no successors. + if (I->Succs.empty()) + BotRoots.push_back(&(*I)); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl<SUnit*>::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) + SchedImpl->releaseBottomNode(*I); +} + /// schedule - Called back from MachineScheduler::runOnMachineFunction -/// after setting up the current scheduling region. +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. void ScheduleDAGMI::schedule() { - buildSchedGraph(AA); + // Initialize the register pressure tracker used by buildSchedGraph. + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Account for liveness generate by the region boundary. + if (LiveRegionEnd != RegionEnd) + RPTracker.recede(); + + // Build the DAG, and compute current register pressure. + buildSchedGraph(AA, &RPTracker); + + // Initialize top/bottom trackers after computing region pressure. + initRegPressure(); - DEBUG(dbgs() << "********** MI Scheduling **********\n"); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -410,22 +636,12 @@ void ScheduleDAGMI::schedule() { releasePredecessors(&ExitSU); // Release all DAG roots for scheduling. - for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end(); - I != E; ++I) { - // A SUnit is ready to top schedule if it has no predecessors. - if (I->Preds.empty()) - SchedImpl->releaseTopNode(&(*I)); - // A SUnit is ready to bottom schedule if it has no successors. - if (I->Succs.empty()) - SchedImpl->releaseBottomNode(&(*I)); - } + releaseRoots(); - CurrentTop = RegionBegin; + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); CurrentBottom = RegionEnd; bool IsTopNode = false; while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { - DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") - << " Scheduling Instruction:\n"; SU->dump(this)); if (!checkSchedLimit()) break; @@ -435,28 +651,69 @@ void ScheduleDAGMI::schedule() { if (IsTopNode) { assert(SU->isTopReady() && "node still has unscheduled dependencies"); if (&*CurrentTop == MI) - ++CurrentTop; - else + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else { moveInstruction(MI, CurrentTop); + TopRPTracker.setPos(MI); + } + + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); + // Release dependent instructions for scheduling. releaseSuccessors(SU); } else { assert(SU->isBottomReady() && "node still has unscheduled dependencies"); - if (&*llvm::prior(CurrentBottom) == MI) - --CurrentBottom; + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; else { - if (&*CurrentTop == MI) - CurrentTop = llvm::next(CurrentTop); + if (&*CurrentTop == MI) { + CurrentTop = nextIfDebug(++CurrentTop, priorII); + TopRPTracker.setPos(CurrentTop); + } moveInstruction(MI, CurrentBottom); CurrentBottom = MI; } + // Update bottom scheduled pressure. + BotRPTracker.recede(); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); + // Release dependent instructions for scheduling. releasePredecessors(SU); } SU->isScheduled = true; + SchedImpl->schedNode(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); +} + +/// Reinsert any remaining debug_values, just like the PostRA scheduler. +void ScheduleDAGMI::placeDebugValues() { + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) { + BB->splice(RegionBegin, BB, FirstDbgValue); + RegionBegin = FirstDbgValue; + } + + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrevMI = P.second; + BB->splice(++OrigPrevMI, BB, DbgValue); + if (OrigPrevMI == llvm::prior(RegionEnd)) + RegionEnd = DbgValue; + } + DbgValues.clear(); + FirstDbgValue = NULL; } //===----------------------------------------------------------------------===// @@ -464,56 +721,603 @@ void ScheduleDAGMI::schedule() { //===----------------------------------------------------------------------===// namespace { +/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience +/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified +/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. +class ReadyQueue { + unsigned ID; + std::string Name; + std::vector<SUnit*> Queue; + +public: + ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} + + unsigned getID() const { return ID; } + + StringRef getName() const { return Name; } + + // SU is in this queue if it's NodeQueueID is a superset of this ID. + bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } + + bool empty() const { return Queue.empty(); } + + unsigned size() const { return Queue.size(); } + + typedef std::vector<SUnit*>::iterator iterator; + + iterator begin() { return Queue.begin(); } + + iterator end() { return Queue.end(); } + + iterator find(SUnit *SU) { + return std::find(Queue.begin(), Queue.end(), SU); + } + + void push(SUnit *SU) { + Queue.push_back(SU); + SU->NodeQueueId |= ID; + } + + void remove(iterator I) { + (*I)->NodeQueueId &= ~ID; + *I = Queue.back(); + Queue.pop_back(); + } + + void dump() { + dbgs() << Name << ": "; + for (unsigned i = 0, e = Queue.size(); i < e; ++i) + dbgs() << Queue[i]->NodeNum << " "; + dbgs() << "\n"; + } +}; + /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. class ConvergingScheduler : public MachineSchedStrategy { + + /// Store the state used by ConvergingScheduler heuristics, required for the + /// lifetime of one invocation of pickNode(). + struct SchedCandidate { + // The best SUnit candidate. + SUnit *SU; + + // Register pressure values for the best candidate. + RegPressureDelta RPDelta; + + SchedCandidate(): SU(NULL) {} + }; + /// Represent the type of SchedCandidate found within a single queue. + enum CandResult { + NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure }; + + /// Each Scheduling boundary is associated with ready queues. It tracks the + /// current cycle in whichever direction at has moved, and maintains the state + /// of "hazards" and other interlocks at the current cycle. + struct SchedBoundary { + ScheduleDAGMI *DAG; + + ReadyQueue Available; + ReadyQueue Pending; + bool CheckPending; + + ScheduleHazardRecognizer *HazardRec; + + unsigned CurrCycle; + unsigned IssueCount; + + /// MinReadyCycle - Cycle of the soonest available instruction. + unsigned MinReadyCycle; + + // Remember the greatest min operand latency. + unsigned MaxMinLatency; + + /// Pending queues extend the ready queues with the same ID and the + /// PendingFlag set. + SchedBoundary(unsigned ID, const Twine &Name): + DAG(0), Available(ID, Name+".A"), + Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), + CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0), + MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} + + ~SchedBoundary() { delete HazardRec; } + + bool isTop() const { + return Available.getID() == ConvergingScheduler::TopQID; + } + + bool checkHazard(SUnit *SU); + + void releaseNode(SUnit *SU, unsigned ReadyCycle); + + void bumpCycle(); + + void bumpNode(SUnit *SU); + + void releasePending(); + + void removeReady(SUnit *SU); + + SUnit *pickOnlyChoice(); + }; + ScheduleDAGMI *DAG; + const TargetRegisterInfo *TRI; - unsigned NumTopReady; - unsigned NumBottomReady; + // State of the top and bottom scheduled instruction boundaries. + SchedBoundary Top; + SchedBoundary Bot; public: - virtual void initialize(ScheduleDAGMI *dag) { - DAG = dag; + /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) + enum { + TopQID = 1, + BotQID = 2, + LogMaxQID = 2 + }; + + ConvergingScheduler(): + DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + + virtual void initialize(ScheduleDAGMI *dag); + + virtual SUnit *pickNode(bool &IsTopNode); + + virtual void schedNode(SUnit *SU, bool IsTopNode); + + virtual void releaseTopNode(SUnit *SU); + + virtual void releaseBottomNode(SUnit *SU); + +protected: + SUnit *pickNodeBidrectional(bool &IsTopNode); - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); + CandResult pickNodeFromQueue(ReadyQueue &Q, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); +#ifndef NDEBUG + void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, + PressureElement P = PressureElement()); +#endif +}; +} // namespace + +void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { + DAG = dag; + TRI = DAG->TRI; + Top.DAG = dag; + Bot.DAG = dag; + + // Initialize the HazardRecognizers. + const TargetMachine &TM = DAG->MF.getTarget(); + const InstrItineraryData *Itin = TM.getInstrItineraryData(); + Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); +} + +void ConvergingScheduler::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; + unsigned Latency = + DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true); +#ifndef NDEBUG + Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + Latency) + SU->TopReadyCycle = PredReadyCycle + Latency; } + Top.releaseNode(SU, SU->TopReadyCycle); +} - virtual SUnit *pickNode(bool &IsTopNode) { - if (DAG->top() == DAG->bottom()) - return NULL; +void ConvergingScheduler::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; - // As an initial placeholder heuristic, schedule in the direction that has - // the fewest choices. - SUnit *SU; - if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) { - SU = DAG->getSUnit(DAG->top()); - IsTopNode = true; + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned Latency = + DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true); +#ifndef NDEBUG + Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + Latency) + SU->BotReadyCycle = SuccReadyCycle + Latency; + } + Bot.releaseNode(SU, SU->BotReadyCycle); +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled()) + return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; + + if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth()) + return true; + + return false; +} + +void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, + unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + if (ReadyCycle > CurrCycle || checkHazard(SU)) + Pending.push(SU); + else + Available.push(SU); +} + +/// Move the boundary of scheduled code by one cycle. +void ConvergingScheduler::SchedBoundary::bumpCycle() { + unsigned Width = DAG->getIssueWidth(); + IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } + else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); } - else { - SU = DAG->getSUnit(llvm::prior(DAG->bottom())); - IsTopNode = false; + } + CheckPending = true; + + DEBUG(dbgs() << "*** " << Available.getName() << " cycle " + << CurrCycle << '\n'); +} + +/// Move the boundary of scheduled code by one SUnit. +void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); } - if (SU->isTopReady()) { - assert(NumTopReady > 0 && "bad ready count"); - --NumTopReady; + HazardRec->EmitInstruction(SU); + } + // Check the instruction group dispatch limit. + // TODO: Check if this SU must end a dispatch group. + IssueCount += DAG->getNumMicroOps(SU->getInstr()); + if (IssueCount >= DAG->getIssueWidth()) { + DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + bumpCycle(); + } +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void ConvergingScheduler::SchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin()+i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin()+i); + --i; --e; + } + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// advance the cycle until at least one node is ready. If multiple instructions +/// are ready, return NULL. +SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + for (unsigned i = 0; Available.empty(); ++i) { + assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + "permanent hazard"); (void)i; + bumpCycle(); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return NULL; +} + +#ifndef NDEBUG +void ConvergingScheduler::traceCandidate(const char *Label, const ReadyQueue &Q, + SUnit *SU, PressureElement P) { + dbgs() << Label << " " << Q.getName() << " "; + if (P.isValid()) + dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease + << " "; + else + dbgs() << " "; + SU->dump(DAG); +} +#endif + +/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is +/// more desirable than RHS from scheduling standpoint. +static bool compareRPDelta(const RegPressureDelta &LHS, + const RegPressureDelta &RHS) { + // Compare each component of pressure in decreasing order of importance + // without checking if any are valid. Invalid PressureElements are assumed to + // have UnitIncrease==0, so are neutral. + + // Avoid increasing the max critical pressure in the scheduled region. + if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) + return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; + + // Avoid increasing the max critical pressure in the scheduled region. + if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) + return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; + + // Avoid increasing the max pressure of the entire region. + if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) + return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; + + return false; +} + +/// Pick the best candidate from the top queue. +/// +/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during +/// DAG building. To adjust for the current scheduling location we need to +/// maintain the number of vreg uses remaining to be top-scheduled. +ConvergingScheduler::CandResult ConvergingScheduler:: +pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, + SchedCandidate &Candidate) { + DEBUG(Q.dump()); + + // getMaxPressureDelta temporarily modifies the tracker. + RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); + + // BestSU remains NULL if no top candidates beat the best existing candidate. + CandResult FoundCandidate = NoCand; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + // Initialize the candidate if needed. + if (!Candidate.SU) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + FoundCandidate = NodeOrder; + continue; + } + // Avoid exceeding the target's limit. + if (RPDelta.Excess.UnitIncrease < Candidate.RPDelta.Excess.UnitIncrease) { + DEBUG(traceCandidate("ECAND", Q, *I, RPDelta.Excess)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + FoundCandidate = SingleExcess; + continue; + } + if (RPDelta.Excess.UnitIncrease > Candidate.RPDelta.Excess.UnitIncrease) + continue; + if (FoundCandidate == SingleExcess) + FoundCandidate = MultiPressure; + + // Avoid increasing the max critical pressure in the scheduled region. + if (RPDelta.CriticalMax.UnitIncrease + < Candidate.RPDelta.CriticalMax.UnitIncrease) { + DEBUG(traceCandidate("PCAND", Q, *I, RPDelta.CriticalMax)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + FoundCandidate = SingleCritical; + continue; + } + if (RPDelta.CriticalMax.UnitIncrease + > Candidate.RPDelta.CriticalMax.UnitIncrease) + continue; + if (FoundCandidate == SingleCritical) + FoundCandidate = MultiPressure; + + // Avoid increasing the max pressure of the entire region. + if (RPDelta.CurrentMax.UnitIncrease + < Candidate.RPDelta.CurrentMax.UnitIncrease) { + DEBUG(traceCandidate("MCAND", Q, *I, RPDelta.CurrentMax)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + FoundCandidate = SingleMax; + continue; } - if (SU->isBottomReady()) { - assert(NumBottomReady > 0 && "bad ready count"); - --NumBottomReady; + if (RPDelta.CurrentMax.UnitIncrease + > Candidate.RPDelta.CurrentMax.UnitIncrease) + continue; + if (FoundCandidate == SingleMax) + FoundCandidate = MultiPressure; + + // Fall through to original instruction order. + // Only consider node order if Candidate was chosen from this Q. + if (FoundCandidate == NoCand) + continue; + + if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) + || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { + DEBUG(traceCandidate("NCAND", Q, *I)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + FoundCandidate = NodeOrder; } + } + return FoundCandidate; +} + +/// Pick the best candidate node from either the top or bottom queue. +SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; return SU; } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + return SU; + } + SchedCandidate BotCand; + // Prefer bottom scheduling when heuristics are silent. + CandResult BotResult = pickNodeFromQueue(Bot.Available, + DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that provides the least increase in + // Excess pressure, we can immediately schedule from that Q. + // + // RegionCriticalPSets summarizes the pressure within the scheduled region and + // affects picking from either Q. If scheduling in one direction must + // increase pressure for one of the excess PSets, then schedule in that + // direction first to provide more freedom in the other direction. + if (BotResult == SingleExcess || BotResult == SingleCritical) { + IsTopNode = false; + return BotCand.SU; + } + // Check if the top Q has a better candidate. + SchedCandidate TopCand; + CandResult TopResult = pickNodeFromQueue(Top.Available, + DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + + if (TopResult == SingleExcess || TopResult == SingleCritical) { + IsTopNode = true; + return TopCand.SU; + } + // If either Q has a single candidate that minimizes pressure above the + // original region's pressure pick it. + if (BotResult == SingleMax) { + IsTopNode = false; + return BotCand.SU; + } + if (TopResult == SingleMax) { + IsTopNode = true; + return TopCand.SU; + } + // Check for a salient pressure difference and pick the best from either side. + if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { + IsTopNode = true; + return TopCand.SU; + } + // Otherwise prefer the bottom candidate in node order. + IsTopNode = false; + return BotCand.SU; +} - virtual void releaseTopNode(SUnit *SU) { - ++NumTopReady; +/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. +SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return NULL; } - virtual void releaseBottomNode(SUnit *SU) { - ++NumBottomReady; + SUnit *SU; + if (ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + SchedCandidate TopCand; + CandResult TopResult = + pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + (void)TopResult; + SU = TopCand.SU; + } + IsTopNode = true; } -}; -} // namespace + else if (ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + SchedCandidate BotCand; + CandResult BotResult = + pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + (void)BotResult; + SU = BotCand.SU; + } + IsTopNode = false; + } + else { + SU = pickNodeBidrectional(IsTopNode); + } + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling Instruction in cycle " + << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; + SU->dump(DAG)); + return SU; +} + +/// Update the scheduler's state after scheduling a node. This is the same node +/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update +/// it's state based on the current cycle before MachineSchedStrategy does. +void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + SU->TopReadyCycle = Top.CurrCycle; + Top.bumpNode(SU); + } + else { + SU->BotReadyCycle = Bot.CurrCycle; + Bot.bumpNode(SU); + } +} /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. @@ -592,6 +1396,8 @@ public: return SU; } + virtual void schedNode(SUnit *SU, bool IsTopNode) {} + virtual void releaseTopNode(SUnit *SU) { TopQ.push(SU); } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 1ce546b..bc383cb 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -99,6 +99,16 @@ namespace { bool PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); }; + + // SuccessorSorter - Sort Successors according to their loop depth. + struct SuccessorSorter { + SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {} + bool operator()(const MachineBasicBlock *LHS, + const MachineBasicBlock *RHS) const { + return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); + } + MachineLoopInfo *LI; + }; } // end anonymous namespace char MachineSinking::ID = 0; @@ -526,8 +536,11 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // Otherwise, we should look at all the successors and decide which one // we should sink to. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - E = MBB->succ_end(); SI != E; ++SI) { + // We give successors with smaller loop depth higher priority. + SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); + std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); + for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(), + E = Succs.end(); SI != E; ++SI) { MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp new file mode 100644 index 0000000..1a3aa60 --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -0,0 +1,1153 @@ +//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-trace-metrics" +#include "MachineTraceMetrics.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SparseSet.h" + +using namespace llvm; + +char MachineTraceMetrics::ID = 0; +char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; + +INITIALIZE_PASS_BEGIN(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) + +MachineTraceMetrics::MachineTraceMetrics() + : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) { + std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0); +} + +void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + TII = MF->getTarget().getInstrInfo(); + TRI = MF->getTarget().getRegisterInfo(); + ItinData = MF->getTarget().getInstrItineraryData(); + MRI = &MF->getRegInfo(); + Loops = &getAnalysis<MachineLoopInfo>(); + BlockInfo.resize(MF->getNumBlockIDs()); + return false; +} + +void MachineTraceMetrics::releaseMemory() { + MF = 0; + BlockInfo.clear(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) { + delete Ensembles[i]; + Ensembles[i] = 0; + } +} + +//===----------------------------------------------------------------------===// +// Fixed block information +//===----------------------------------------------------------------------===// +// +// The number of instructions in a basic block and the CPU resources used by +// those instructions don't depend on any given trace strategy. + +/// Compute the resource usage in basic block MBB. +const MachineTraceMetrics::FixedBlockInfo* +MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { + assert(MBB && "No basic block"); + FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()]; + if (FBI->hasResources()) + return FBI; + + // Compute resource usage in the block. + // FIXME: Compute per-functional unit counts. + FBI->HasCalls = false; + unsigned InstrCount = 0; + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *MI = I; + if (MI->isTransient()) + continue; + ++InstrCount; + if (MI->isCall()) + FBI->HasCalls = true; + } + FBI->InstrCount = InstrCount; + return FBI; +} + +//===----------------------------------------------------------------------===// +// Ensemble utility functions +//===----------------------------------------------------------------------===// + +MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) + : MTM(*ct) { + BlockInfo.resize(MTM.BlockInfo.size()); +} + +// Virtual destructor serves as an anchor. +MachineTraceMetrics::Ensemble::~Ensemble() {} + +const MachineLoop* +MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { + return MTM.Loops->getLoopFor(MBB); +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace above MBB. +void MachineTraceMetrics::Ensemble:: +computeDepthResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources from trace above. The top block is simple. + if (!TBI->Pred) { + TBI->InstrDepth = 0; + TBI->Head = MBB->getNumber(); + return; + } + + // Compute from the block above. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()]; + assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); + const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); + TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; + TBI->Head = PredTBI->Head; +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace below MBB. +void MachineTraceMetrics::Ensemble:: +computeHeightResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources for the current block. + TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; + + // The trace tail is done. + if (!TBI->Succ) { + TBI->Tail = MBB->getNumber(); + return; + } + + // Compute from the block below. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()]; + assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); + TBI->InstrHeight += SuccTBI->InstrHeight; + TBI->Tail = SuccTBI->Tail; +} + +// Check if depth resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getDepthResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidDepth() ? TBI : 0; +} + +// Check if height resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getHeightResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidHeight() ? TBI : 0; +} + +//===----------------------------------------------------------------------===// +// Trace Selection Strategies +//===----------------------------------------------------------------------===// +// +// A trace selection strategy is implemented as a sub-class of Ensemble. The +// trace through a block B is computed by two DFS traversals of the CFG +// starting from B. One upwards, and one downwards. During the upwards DFS, +// pickTracePred() is called on the post-ordered blocks. During the downwards +// DFS, pickTraceSucc() is called in a post-order. +// + +// We never allow traces that leave loops, but we do allow traces to enter +// nested loops. We also never allow traces to contain back-edges. +// +// This means that a loop header can never appear above the center block of a +// trace, except as the trace head. Below the center block, loop exiting edges +// are banned. +// +// Return true if an edge from the From loop to the To loop is leaving a loop. +// Either of To and From can be null. +static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { + return From && !From->contains(To); +} + +// MinInstrCountEnsemble - Pick the trace that executes the least number of +// instructions. +namespace { +class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { + const char *getName() const { return "MinInstr"; } + const MachineBasicBlock *pickTracePred(const MachineBasicBlock*); + const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*); + +public: + MinInstrCountEnsemble(MachineTraceMetrics *mtm) + : MachineTraceMetrics::Ensemble(mtm) {} +}; +} + +// Select the preferred predecessor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + // Don't leave loops, and never follow back-edges. + if (CurLoop && MBB == CurLoop->getHeader()) + return 0; + unsigned CurCount = MTM.getResources(MBB)->InstrCount; + const MachineBasicBlock *Best = 0; + unsigned BestDepth = 0; + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + const MachineBasicBlock *Pred = *I; + const MachineTraceMetrics::TraceBlockInfo *PredTBI = + getDepthResources(Pred); + // Ignore cycles that aren't natural loops. + if (!PredTBI) + continue; + // Pick the predecessor that would give this block the smallest InstrDepth. + unsigned Depth = PredTBI->InstrDepth + CurCount; + if (!Best || Depth < BestDepth) + Best = Pred, BestDepth = Depth; + } + return Best; +} + +// Select the preferred successor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + const MachineBasicBlock *Best = 0; + unsigned BestHeight = 0; + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + const MachineBasicBlock *Succ = *I; + // Don't consider back-edges. + if (CurLoop && Succ == CurLoop->getHeader()) + continue; + // Don't consider successors exiting CurLoop. + if (isExitingLoop(CurLoop, getLoopFor(Succ))) + continue; + const MachineTraceMetrics::TraceBlockInfo *SuccTBI = + getHeightResources(Succ); + // Ignore cycles that aren't natural loops. + if (!SuccTBI) + continue; + // Pick the successor that would give this block the smallest InstrHeight. + unsigned Height = SuccTBI->InstrHeight; + if (!Best || Height < BestHeight) + Best = Succ, BestHeight = Height; + } + return Best; +} + +// Get an Ensemble sub-class for the requested trace strategy. +MachineTraceMetrics::Ensemble * +MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) { + assert(strategy < TS_NumStrategies && "Invalid trace strategy enum"); + Ensemble *&E = Ensembles[strategy]; + if (E) + return E; + + // Allocate new Ensemble on demand. + switch (strategy) { + case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this)); + default: llvm_unreachable("Invalid trace strategy enum"); + } +} + +void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n'); + BlockInfo[MBB->getNumber()].invalidate(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->invalidate(MBB); +} + +void MachineTraceMetrics::verifyAnalysis() const { + if (!MF) + return; +#ifndef NDEBUG + assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size"); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->verify(); +#endif +} + +//===----------------------------------------------------------------------===// +// Trace building +//===----------------------------------------------------------------------===// +// +// Traces are built by two CFG traversals. To avoid recomputing too much, use a +// set abstraction that confines the search to the current loop, and doesn't +// revisit blocks. + +namespace { +struct LoopBounds { + MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks; + SmallPtrSet<const MachineBasicBlock*, 8> Visited; + const MachineLoopInfo *Loops; + bool Downward; + LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks, + const MachineLoopInfo *loops) + : Blocks(blocks), Loops(loops), Downward(false) {} +}; +} + +// Specialize po_iterator_storage in order to prune the post-order traversal so +// it is limited to the current loop and doesn't traverse the loop back edges. +namespace llvm { +template<> +class po_iterator_storage<LoopBounds, true> { + LoopBounds &LB; +public: + po_iterator_storage(LoopBounds &lb) : LB(lb) {} + void finishPostorder(const MachineBasicBlock*) {} + + bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) { + // Skip already visited To blocks. + MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; + if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) + return false; + // From is null once when To is the trace center block. + if (From) { + if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) { + // Don't follow backedges, don't leave FromLoop when going upwards. + if ((LB.Downward ? To : From) == FromLoop->getHeader()) + return false; + // Don't leave FromLoop. + if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) + return false; + } + } + // To is a new block. Mark the block as visited in case the CFG has cycles + // that MachineLoopInfo didn't recognize as a natural loop. + return LB.Visited.insert(To); + } +}; +} + +/// Compute the trace through MBB. +void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Computing " << getName() << " trace through BB#" + << MBB->getNumber() << '\n'); + // Set up loop bounds for the backwards post-order traversal. + LoopBounds Bounds(BlockInfo, MTM.Loops); + + // Run an upwards post-order search for the trace start. + Bounds.Downward = false; + Bounds.Visited.clear(); + typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO; + for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the predecessors have been visited, pick the preferred one. + TBI.Pred = pickTracePred(*I); + DEBUG({ + if (TBI.Pred) + dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leading to I is now known, compute the depth resources. + computeDepthResources(*I); + } + + // Run a downwards post-order search for the trace end. + Bounds.Downward = true; + Bounds.Visited.clear(); + typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO; + for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the successors have been visited, pick the preferred one. + TBI.Succ = pickTraceSucc(*I); + DEBUG({ + if (TBI.Succ) + dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leaving I is now known, compute the height resources. + computeHeightResources(*I); + } +} + +/// Invalidate traces through BadMBB. +void +MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { + SmallVector<const MachineBasicBlock*, 16> WorkList; + TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()]; + + // Invalidate height resources of blocks above MBB. + if (BadTBI.hasValidHeight()) { + BadTBI.invalidateHeight(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " height.\n"); + // Find any MBB predecessors that have MBB as their preferred successor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidHeight()) + continue; + if (TBI.Succ == MBB) { + TBI.invalidateHeight(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Succ is actually a *I successor. + assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Invalidate depth resources of blocks below MBB. + if (BadTBI.hasValidDepth()) { + BadTBI.invalidateDepth(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " depth.\n"); + // Find any MBB successors that have MBB as their preferred predecessor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidDepth()) + continue; + if (TBI.Pred == MBB) { + TBI.invalidateDepth(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Pred is actually a *I predecessor. + assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Clear any per-instruction data. We only have to do this for BadMBB itself + // because the instructions in that block may change. Other blocks may be + // invalidated, but their instructions will stay the same, so there is no + // need to erase the Cycle entries. They will be overwritten when we + // recompute. + for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end(); + I != E; ++I) + Cycles.erase(I); +} + +void MachineTraceMetrics::Ensemble::verify() const { +#ifndef NDEBUG + assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() && + "Outdated BlockInfo size"); + for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) { + const TraceBlockInfo &TBI = BlockInfo[Num]; + if (TBI.hasValidDepth() && TBI.Pred) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() && + "Trace is broken, depth should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge"); + } + if (TBI.hasValidHeight() && TBI.Succ) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() && + "Trace is broken, height should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + const MachineLoop *SuccLoop = getLoopFor(TBI.Succ); + assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) && + "Trace contains backedge"); + } + } +#endif +} + +//===----------------------------------------------------------------------===// +// Data Dependencies +//===----------------------------------------------------------------------===// +// +// Compute the depth and height of each instruction based on data dependencies +// and instruction latencies. These cycle numbers assume that the CPU can issue +// an infinite number of instructions per cycle as long as their dependencies +// are ready. + +// A data dependency is represented as a defining MI and operand numbers on the +// defining and using MI. +namespace { +struct DataDep { + const MachineInstr *DefMI; + unsigned DefOp; + unsigned UseOp; + + DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp) + : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {} + + /// Create a DataDep from an SSA form virtual register. + DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) + : UseOp(UseOp) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); + MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); + assert(!DefI.atEnd() && "Register has no defs"); + DefMI = &*DefI; + DefOp = DefI.getOperandNo(); + assert((++DefI).atEnd() && "Register has multiple defs"); + } +}; +} + +// Get the input data dependencies that must be ready before UseMI can issue. +// Return true if UseMI has any physreg operands. +static bool getDataDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineRegisterInfo *MRI) { + bool HasPhysRegs = false; + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + HasPhysRegs = true; + continue; + } + // Collect virtual register reads. + if (MO->readsReg()) + Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo())); + } + return HasPhysRegs; +} + +// Get the input data dependencies of a PHI instruction, using Pred as the +// preferred predecessor. +// This will add at most one dependency to Deps. +static void getPHIDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineBasicBlock *Pred, + const MachineRegisterInfo *MRI) { + // No predecessor at the beginning of a trace. Ignore dependencies. + if (!Pred) + return; + assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI"); + for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) { + if (UseMI->getOperand(i + 1).getMBB() == Pred) { + unsigned Reg = UseMI->getOperand(i).getReg(); + Deps.push_back(DataDep(MRI, Reg, i)); + return; + } + } +} + +// Keep track of physreg data dependencies by recording each live register unit. +// Associate each regunit with an instruction operand. Depending on the +// direction instructions are scanned, it could be the operand that defined the +// regunit, or the highest operand to read the regunit. +namespace { +struct LiveRegUnit { + unsigned RegUnit; + unsigned Cycle; + const MachineInstr *MI; + unsigned Op; + + unsigned getSparseSetIndex() const { return RegUnit; } + + LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {} +}; +} + +// Identify physreg dependencies for UseMI, and update the live regunit +// tracking set when scanning instructions downwards. +static void updatePhysDepsDownwards(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + SparseSet<LiveRegUnit> &RegUnits, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> Kills; + SmallVector<unsigned, 8> LiveDefOps; + + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // Track live defs and kills for updating RegUnits. + if (MO->isDef()) { + if (MO->isDead()) + Kills.push_back(Reg); + else + LiveDefOps.push_back(MO.getOperandNo()); + } else if (MO->isKill()) + Kills.push_back(Reg); + // Identify dependencies. + if (!MO->readsReg()) + continue; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo())); + break; + } + } + + // Update RegUnits to reflect live registers after UseMI. + // First kills. + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units) + RegUnits.erase(*Units); + + // Second, live defs. + for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) { + unsigned DefOp = LiveDefOps[i]; + for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI); + Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + LRU.MI = UseMI; + LRU.Op = DefOp; + } + } +} + +/// The length of the critical path through a trace is the maximum of two path +/// lengths: +/// +/// 1. The maximum height+depth over all instructions in the trace center block. +/// +/// 2. The longest cross-block dependency chain. For small blocks, it is +/// possible that the critical path through the trace doesn't include any +/// instructions in the block. +/// +/// This function computes the second number from the live-in list of the +/// center block. +unsigned MachineTraceMetrics::Ensemble:: +computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { + assert(TBI.HasValidInstrDepths && "Missing depth info"); + assert(TBI.HasValidInstrHeights && "Missing height info"); + unsigned MaxLen = 0; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + const LiveInReg &LIR = TBI.LiveIns[i]; + if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) + continue; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + // Ignore dependencies outside the current trace. + const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; + if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head) + continue; + unsigned Len = LIR.Height + Cycles[DefMI].Depth; + MaxLen = std::max(MaxLen, Len); + } + return MaxLen; +} + +/// Compute instruction depths for all instructions above or in MBB in its +/// trace. This assumes that the trace through MBB has already been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrDepths(const MachineBasicBlock *MBB) { + // The top of the trace may already be computed, and HasValidInstrDepths + // implies Head->HasValidInstrDepths, so we only need to start from the first + // block in the trace that needs to be recomputed. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidDepth() && "Incomplete trace"); + if (TBI.HasValidInstrDepths) + break; + Stack.push_back(MBB); + MBB = TBI.Pred; + } while (MBB); + + // FIXME: If MBB is non-null at this point, it is the last pre-computed block + // in the trace. We should track any live-out physregs that were defined in + // the trace. This is quite rare in SSA form, typically created by CSE + // hoisting a compare. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // Go through trace blocks in top-down order, stopping after the center block. + SmallVector<DataDep, 8> Deps; + while (!Stack.empty()) { + MBB = Stack.pop_back_val(); + DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrDepths = true; + TBI.CriticalPath = 0; + + // Also compute the critical path length through MBB when possible. + if (TBI.HasValidInstrHeights) + TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); + + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *UseMI = I; + + // Collect all data dependencies. + Deps.clear(); + if (UseMI->isPHI()) + getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(UseMI, Deps, MTM.MRI)) + updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI); + + // Filter and process dependencies, computing the earliest issue cycle. + unsigned Cycle = 0; + for (unsigned i = 0, e = Deps.size(); i != e; ++i) { + const DataDep &Dep = Deps[i]; + const TraceBlockInfo&DepTBI = + BlockInfo[Dep.DefMI->getParent()->getNumber()]; + // Ignore dependencies from outside the current trace. + if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head) + continue; + assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); + unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData, + Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp, + /* FindMin = */ false); + Cycle = std::max(Cycle, DepCycle); + } + // Remember the instruction depth. + InstrCycles &MICycles = Cycles[UseMI]; + MICycles.Depth = Cycle; + + if (!TBI.HasValidInstrHeights) { + DEBUG(dbgs() << Cycle << '\t' << *UseMI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI); + } + } +} + +// Identify physreg dependencies for MI when scanning instructions upwards. +// Return the issue height of MI after considering any live regunits. +// Height is the issue height computed from virtual register dependencies alone. +static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, + SparseSet<LiveRegUnit> &RegUnits, + const InstrItineraryData *ItinData, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> ReadOps; + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MO->readsReg()) + ReadOps.push_back(MO.getOperandNo()); + if (!MO->isDef()) + continue; + // This is a def of Reg. Remove corresponding entries from RegUnits, and + // update MI Height to consider the physreg dependencies. + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + unsigned DepHeight = I->Cycle; + if (!MI->isTransient()) { + // We may not know the UseMI of this dependency, if it came from the + // live-in list. + if (I->MI) + DepHeight += TII->computeOperandLatency(ItinData, + MI, MO.getOperandNo(), + I->MI, I->Op); + else + // No UseMI. Just use the MI latency instead. + DepHeight += TII->getInstrLatency(ItinData, MI); + } + Height = std::max(Height, DepHeight); + // This regunit is dead above MI. + RegUnits.erase(I); + } + } + + // Now we know the height of MI. Update any regunits read. + for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { + unsigned Reg = MI->getOperand(ReadOps[i]).getReg(); + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + // Set the height to the highest reader of the unit. + if (LRU.Cycle <= Height && LRU.MI != MI) { + LRU.Cycle = Height; + LRU.MI = MI; + LRU.Op = ReadOps[i]; + } + } + } + + return Height; +} + + +typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap; + +// Push the height of DefMI upwards if required to match UseMI. +// Return true if this is the first time DefMI was seen. +static bool pushDepHeight(const DataDep &Dep, + const MachineInstr *UseMI, unsigned UseHeight, + MIHeightMap &Heights, + const InstrItineraryData *ItinData, + const TargetInstrInfo *TII) { + // Adjust height by Dep.DefMI latency. + if (!Dep.DefMI->isTransient()) + UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp); + + // Update Heights[DefMI] to be the maximum height seen. + MIHeightMap::iterator I; + bool New; + tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); + if (New) + return true; + + // DefMI has been pushed before. Give it the max height. + if (I->second < UseHeight) + I->second = UseHeight; + return false; +} + +/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists +/// of all the blocks in Trace. Stop when reaching the block that contains +/// DefMI. +void MachineTraceMetrics::Ensemble:: +addLiveIns(const MachineInstr *DefMI, + ArrayRef<const MachineBasicBlock*> Trace) { + assert(!Trace.empty() && "Trace should contain at least one block"); + unsigned Reg = DefMI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + const MachineBasicBlock *DefMBB = DefMI->getParent(); + + // Reg is live-in to all blocks in Trace that follow DefMBB. + for (unsigned i = Trace.size(); i; --i) { + const MachineBasicBlock *MBB = Trace[i-1]; + if (MBB == DefMBB) + return; + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + // Just add the register. The height will be updated later. + TBI.LiveIns.push_back(Reg); + } +} + +/// Compute instruction heights in the trace through MBB. This updates MBB and +/// the blocks below it in the trace. It is assumed that the trace has already +/// been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrHeights(const MachineBasicBlock *MBB) { + // The bottom of the trace may already be computed. + // Find the blocks that need updating. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidHeight() && "Incomplete trace"); + if (TBI.HasValidInstrHeights) + break; + Stack.push_back(MBB); + TBI.LiveIns.clear(); + MBB = TBI.Succ; + } while (MBB); + + // As we move upwards in the trace, keep track of instructions that are + // required by deeper trace instructions. Map MI -> height required so far. + MIHeightMap Heights; + + // For physregs, the def isn't known when we see the use. + // Instead, keep track of the highest use of each regunit. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // If the bottom of the trace was already precomputed, initialize heights + // from its live-in list. + // MBB is the highest precomputed block in the trace. + if (MBB) { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg LI = TBI.LiveIns[i]; + if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) { + // For virtual registers, the def latency is included. + unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; + if (Height < LI.Height) + Height = LI.Height; + } else { + // For register units, the def latency is not included because we don't + // know the def yet. + RegUnits[LI.Reg].Cycle = LI.Height; + } + } + } + + // Go through the trace blocks in bottom-up order. + SmallVector<DataDep, 8> Deps; + for (;!Stack.empty(); Stack.pop_back()) { + MBB = Stack.back(); + DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrHeights = true; + TBI.CriticalPath = 0; + + // Get dependencies from PHIs in the trace successor. + const MachineBasicBlock *Succ = TBI.Succ; + // If MBB is the last block in the trace, and it has a back-edge to the + // loop header, get loop-carried dependencies from PHIs in the header. For + // that purpose, pretend that all the loop header PHIs have height 0. + if (!Succ) + if (const MachineLoop *Loop = getLoopFor(MBB)) + if (MBB->isSuccessor(Loop->getHeader())) + Succ = Loop->getHeader(); + + if (Succ) { + for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end(); + I != E && I->isPHI(); ++I) { + const MachineInstr *PHI = I; + Deps.clear(); + getPHIDeps(PHI, Deps, MBB, MTM.MRI); + if (!Deps.empty()) { + // Loop header PHI heights are all 0. + unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0; + DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); + if (pushDepHeight(Deps.front(), PHI, Height, + Heights, MTM.ItinData, MTM.TII)) + addLiveIns(Deps.front().DefMI, Stack); + } + } + } + + // Go through the block backwards. + for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin(); + BI != BB;) { + const MachineInstr *MI = --BI; + + // Find the MI height as determined by virtual register uses in the + // trace below. + unsigned Cycle = 0; + MIHeightMap::iterator HeightI = Heights.find(MI); + if (HeightI != Heights.end()) { + Cycle = HeightI->second; + // We won't be seeing any more MI uses. + Heights.erase(HeightI); + } + + // Don't process PHI deps. They depend on the specific predecessor, and + // we'll get them when visiting the predecessor. + Deps.clear(); + bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI); + + // There may also be regunit dependencies to include in the height. + if (HasPhysRegs) + Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, + MTM.ItinData, MTM.TII, MTM.TRI); + + // Update the required height of any virtual registers read by MI. + for (unsigned i = 0, e = Deps.size(); i != e; ++i) + if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII)) + addLiveIns(Deps[i].DefMI, Stack); + + InstrCycles &MICycles = Cycles[MI]; + MICycles.Height = Cycle; + if (!TBI.HasValidInstrDepths) { + DEBUG(dbgs() << Cycle << '\t' << *MI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI); + } + + // Update virtual live-in heights. They were added by addLiveIns() with a 0 + // height because the final height isn't known until now. + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:"); + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg &LIR = TBI.LiveIns[i]; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + LIR.Height = Heights.lookup(DefMI); + DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height); + } + + // Transfer the live regunits to the live-in list. + for (SparseSet<LiveRegUnit>::const_iterator + RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) { + TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle)); + DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI) + << '@' << RI->Cycle); + } + DEBUG(dbgs() << '\n'); + + if (!TBI.HasValidInstrDepths) + continue; + // Add live-ins to the critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, + computeCrossBlockCriticalPath(TBI)); + DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); + } +} + +MachineTraceMetrics::Trace +MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { + // FIXME: Check cache tags, recompute as needed. + computeTrace(MBB); + computeInstrDepths(MBB); + computeInstrHeights(MBB); + return Trace(*this, BlockInfo[MBB->getNumber()]); +} + +unsigned +MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const { + assert(MI && "Not an instruction."); + assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) && + "MI must be in the trace center block"); + InstrCycles Cyc = getInstrCycles(MI); + return getCriticalPath() - (Cyc.Depth + Cyc.Height); +} + +unsigned +MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { + const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); + SmallVector<DataDep, 1> Deps; + getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); + assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); + DataDep &Dep = Deps.front(); + unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData, + Dep.DefMI, Dep.DefOp, + PHI, Dep.UseOp, + /* FindMin = */ false); + return DepCycle; +} + +unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { + // For now, we compute the resource depth from instruction count / issue + // width. Eventually, we should compute resource depth per functional unit + // and return the max. + unsigned Instrs = TBI.InstrDepth; + if (Bottom) + Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; + if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) + if (Model->IssueWidth != 0) + return Instrs / Model->IssueWidth; + // Assume issue width 1 without a schedule model. + return Instrs; +} + +unsigned MachineTraceMetrics::Trace:: +getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const { + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) + Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) + if (Model->IssueWidth != 0) + return Instrs / Model->IssueWidth; + // Assume issue width 1 without a schedule model. + return Instrs; +} + +void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { + OS << getName() << " ensemble:\n"; + for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { + OS << " BB#" << i << '\t'; + BlockInfo[i].print(OS); + OS << '\n'; + } +} + +void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { + if (hasValidDepth()) { + OS << "depth=" << InstrDepth; + if (Pred) + OS << " pred=BB#" << Pred->getNumber(); + else + OS << " pred=null"; + OS << " head=BB#" << Head; + if (HasValidInstrDepths) + OS << " +instrs"; + } else + OS << "depth invalid"; + OS << ", "; + if (hasValidHeight()) { + OS << "height=" << InstrHeight; + if (Succ) + OS << " succ=BB#" << Succ->getNumber(); + else + OS << " succ=null"; + OS << " tail=BB#" << Tail; + if (HasValidInstrHeights) + OS << " +instrs"; + } else + OS << "height invalid"; + if (HasValidInstrDepths && HasValidInstrHeights) + OS << ", crit=" << CriticalPath; +} + +void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { + unsigned MBBNum = &TBI - &TE.BlockInfo[0]; + + OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum + << " --> BB#" << TBI.Tail << ':'; + if (TBI.hasValidHeight() && TBI.hasValidDepth()) + OS << ' ' << getInstrCount() << " instrs."; + if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights) + OS << ' ' << TBI.CriticalPath << " cycles."; + + const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; + OS << "\nBB#" << MBBNum; + while (Block->hasValidDepth() && Block->Pred) { + unsigned Num = Block->Pred->getNumber(); + OS << " <- BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + + Block = &TBI; + OS << "\n "; + while (Block->hasValidHeight() && Block->Succ) { + unsigned Num = Block->Succ->getNumber(); + OS << " -> BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + OS << '\n'; +} diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h new file mode 100644 index 0000000..c5b86f3 --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.h @@ -0,0 +1,341 @@ +//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the MachineTraceMetrics analysis pass +// that estimates CPU resource usage and critical data dependency paths through +// preferred traces. This is useful for super-scalar CPUs where execution speed +// can be limited both by data dependencies and by limited execution resources. +// +// Out-of-order CPUs will often be executing instructions from multiple basic +// blocks at the same time. This makes it difficult to estimate the resource +// usage accurately in a single basic block. Resources can be estimated better +// by looking at a trace through the current basic block. +// +// For every block, the MachineTraceMetrics pass will pick a preferred trace +// that passes through the block. The trace is chosen based on loop structure, +// branch probabilities, and resource usage. The intention is to pick likely +// traces that would be the most affected by code transformations. +// +// It is expensive to compute a full arbitrary trace for every block, so to +// save some computations, traces are chosen to be convergent. This means that +// if the traces through basic blocks A and B ever cross when moving away from +// A and B, they never diverge again. This applies in both directions - If the +// traces meet above A and B, they won't diverge when going further back. +// +// Traces tend to align with loops. The trace through a block in an inner loop +// will begin at the loop entry block and end at a back edge. If there are +// nested loops, the trace may begin and end at those instead. +// +// For each trace, we compute the critical path length, which is the number of +// cycles required to execute the trace when execution is limited by data +// dependencies only. We also compute the resource height, which is the number +// of cycles required to execute all instructions in the trace when ignoring +// data dependencies. +// +// Every instruction in the current block has a slack - the number of cycles +// execution of the instruction can be delayed without extending the critical +// path. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H +#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class InstrItineraryData; +class MachineBasicBlock; +class MachineInstr; +class MachineLoop; +class MachineLoopInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class TargetRegisterInfo; +class raw_ostream; + +class MachineTraceMetrics : public MachineFunctionPass { + const MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const InstrItineraryData *ItinData; + const MachineRegisterInfo *MRI; + const MachineLoopInfo *Loops; + +public: + class Ensemble; + class Trace; + static char ID; + MachineTraceMetrics(); + void getAnalysisUsage(AnalysisUsage&) const; + bool runOnMachineFunction(MachineFunction&); + void releaseMemory(); + void verifyAnalysis() const; + + friend class Ensemble; + friend class Trace; + + /// Per-basic block information that doesn't depend on the trace through the + /// block. + struct FixedBlockInfo { + /// The number of non-trivial instructions in the block. + /// Doesn't count PHI and COPY instructions that are likely to be removed. + unsigned InstrCount; + + /// True when the block contains calls. + bool HasCalls; + + FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {} + + /// Returns true when resource information for this block has been computed. + bool hasResources() const { return InstrCount != ~0u; } + + /// Invalidate resource information. + void invalidate() { InstrCount = ~0u; } + }; + + /// Get the fixed resource information about MBB. Compute it on demand. + const FixedBlockInfo *getResources(const MachineBasicBlock*); + + /// A virtual register or regunit required by a basic block or its trace + /// successors. + struct LiveInReg { + /// The virtual register required, or a register unit. + unsigned Reg; + + /// For virtual registers: Minimum height of the defining instruction. + /// For regunits: Height of the highest user in the trace. + unsigned Height; + + LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {} + }; + + /// Per-basic block information that relates to a specific trace through the + /// block. Convergent traces means that only one of these is required per + /// block in a trace ensemble. + struct TraceBlockInfo { + /// Trace predecessor, or NULL for the first block in the trace. + /// Valid when hasValidDepth(). + const MachineBasicBlock *Pred; + + /// Trace successor, or NULL for the last block in the trace. + /// Valid when hasValidHeight(). + const MachineBasicBlock *Succ; + + /// The block number of the head of the trace. (When hasValidDepth()). + unsigned Head; + + /// The block number of the tail of the trace. (When hasValidHeight()). + unsigned Tail; + + /// Accumulated number of instructions in the trace above this block. + /// Does not include instructions in this block. + unsigned InstrDepth; + + /// Accumulated number of instructions in the trace below this block. + /// Includes instructions in this block. + unsigned InstrHeight; + + TraceBlockInfo() : + Pred(0), Succ(0), + InstrDepth(~0u), InstrHeight(~0u), + HasValidInstrDepths(false), HasValidInstrHeights(false) {} + + /// Returns true if the depth resources have been computed from the trace + /// above this block. + bool hasValidDepth() const { return InstrDepth != ~0u; } + + /// Returns true if the height resources have been computed from the trace + /// below this block. + bool hasValidHeight() const { return InstrHeight != ~0u; } + + /// Invalidate depth resources when some block above this one has changed. + void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; } + + /// Invalidate height resources when a block below this one has changed. + void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } + + // Data-dependency-related information. Per-instruction depth and height + // are computed from data dependencies in the current trace, using + // itinerary data. + + /// Instruction depths have been computed. This implies hasValidDepth(). + bool HasValidInstrDepths; + + /// Instruction heights have been computed. This implies hasValidHeight(). + bool HasValidInstrHeights; + + /// Critical path length. This is the number of cycles in the longest data + /// dependency chain through the trace. This is only valid when both + /// HasValidInstrDepths and HasValidInstrHeights are set. + unsigned CriticalPath; + + /// Live-in registers. These registers are defined above the current block + /// and used by this block or a block below it. + /// This does not include PHI uses in the current block, but it does + /// include PHI uses in deeper blocks. + SmallVector<LiveInReg, 4> LiveIns; + + void print(raw_ostream&) const; + }; + + /// InstrCycles represents the cycle height and depth of an instruction in a + /// trace. + struct InstrCycles { + /// Earliest issue cycle as determined by data dependencies and instruction + /// latencies from the beginning of the trace. Data dependencies from + /// before the trace are not included. + unsigned Depth; + + /// Minimum number of cycles from this instruction is issued to the of the + /// trace, as determined by data dependencies and instruction latencies. + unsigned Height; + }; + + /// A trace represents a plausible sequence of executed basic blocks that + /// passes through the current basic block one. The Trace class serves as a + /// handle to internal cached data structures. + class Trace { + Ensemble &TE; + TraceBlockInfo &TBI; + + unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; } + + public: + explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {} + void print(raw_ostream&) const; + + /// Compute the total number of instructions in the trace. + unsigned getInstrCount() const { + return TBI.InstrDepth + TBI.InstrHeight; + } + + /// Return the resource depth of the top/bottom of the trace center block. + /// This is the number of cycles required to execute all instructions from + /// the trace head to the trace center block. The resource depth only + /// considers execution resources, it ignores data dependencies. + /// When Bottom is set, instructions in the trace center block are included. + unsigned getResourceDepth(bool Bottom) const; + + /// Return the resource length of the trace. This is the number of cycles + /// required to execute the instructions in the trace if they were all + /// independent, exposing the maximum instruction-level parallelism. + /// + /// Any blocks in Extrablocks are included as if they were part of the + /// trace. + unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks = + ArrayRef<const MachineBasicBlock*>()) const; + + /// Return the length of the (data dependency) critical path through the + /// trace. + unsigned getCriticalPath() const { return TBI.CriticalPath; } + + /// Return the depth and height of MI. The depth is only valid for + /// instructions in or above the trace center block. The height is only + /// valid for instructions in or below the trace center block. + InstrCycles getInstrCycles(const MachineInstr *MI) const { + return TE.Cycles.lookup(MI); + } + + /// Return the slack of MI. This is the number of cycles MI can be delayed + /// before the critical path becomes longer. + /// MI must be an instruction in the trace center block. + unsigned getInstrSlack(const MachineInstr *MI) const; + + /// Return the Depth of a PHI instruction in a trace center block successor. + /// The PHI does not have to be part of the trace. + unsigned getPHIDepth(const MachineInstr *PHI) const; + }; + + /// A trace ensemble is a collection of traces selected using the same + /// strategy, for example 'minimum resource height'. There is one trace for + /// every block in the function. + class Ensemble { + SmallVector<TraceBlockInfo, 4> BlockInfo; + DenseMap<const MachineInstr*, InstrCycles> Cycles; + friend class Trace; + + void computeTrace(const MachineBasicBlock*); + void computeDepthResources(const MachineBasicBlock*); + void computeHeightResources(const MachineBasicBlock*); + unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); + void computeInstrDepths(const MachineBasicBlock*); + void computeInstrHeights(const MachineBasicBlock*); + void addLiveIns(const MachineInstr *DefMI, + ArrayRef<const MachineBasicBlock*> Trace); + + protected: + MachineTraceMetrics &MTM; + virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0; + virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0; + explicit Ensemble(MachineTraceMetrics*); + const MachineLoop *getLoopFor(const MachineBasicBlock*) const; + const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; + const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; + + public: + virtual ~Ensemble(); + virtual const char *getName() const =0; + void print(raw_ostream&) const; + void invalidate(const MachineBasicBlock *MBB); + void verify() const; + + /// Get the trace that passes through MBB. + /// The trace is computed on demand. + Trace getTrace(const MachineBasicBlock *MBB); + }; + + /// Strategies for selecting traces. + enum Strategy { + /// Select the trace through a block that has the fewest instructions. + TS_MinInstrCount, + + TS_NumStrategies + }; + + /// Get the trace ensemble representing the given trace selection strategy. + /// The returned Ensemble object is owned by the MachineTraceMetrics analysis, + /// and valid for the lifetime of the analysis pass. + Ensemble *getEnsemble(Strategy); + + /// Invalidate cached information about MBB. This must be called *before* MBB + /// is erased, or the CFG is otherwise changed. + /// + /// This invalidates per-block information about resource usage for MBB only, + /// and it invalidates per-trace information for any trace that passes + /// through MBB. + /// + /// Call Ensemble::getTrace() again to update any trace handles. + void invalidate(const MachineBasicBlock *MBB); + +private: + // One entry per basic block, indexed by block number. + SmallVector<FixedBlockInfo, 4> BlockInfo; + + // One ensemble per strategy. + Ensemble* Ensembles[TS_NumStrategies]; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, + const MachineTraceMetrics::Trace &Tr) { + Tr.print(OS); + return OS; +} + +inline raw_ostream &operator<<(raw_ostream &OS, + const MachineTraceMetrics::Ensemble &En) { + En.print(OS); + return OS; +} +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 74ba94d..172402e 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -89,8 +89,8 @@ namespace { void addRegWithSubRegs(RegVector &RV, unsigned Reg) { RV.push_back(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++) - RV.push_back(*R); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + RV.push_back(*SubRegs); } struct BBInfo { @@ -191,9 +191,11 @@ namespace { void visitMachineFunctionBefore(); void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); + void visitMachineBundleBefore(const MachineInstr *MI); void visitMachineInstrBefore(const MachineInstr *MI); void visitMachineOperand(const MachineOperand *MO, unsigned MONum); void visitMachineInstrAfter(const MachineInstr *MI); + void visitMachineBundleAfter(const MachineInstr *MI); void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); void visitMachineFunctionAfter(); @@ -201,6 +203,10 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB); void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); + void report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI); + void report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI); void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); @@ -210,6 +216,10 @@ namespace { void calcRegsRequired(); void verifyLiveVariables(); void verifyLiveIntervals(); + void verifyLiveInterval(const LiveInterval&); + void verifyLiveIntervalValue(const LiveInterval&, VNInfo*); + void verifyLiveIntervalSegment(const LiveInterval&, + LiveInterval::const_iterator); }; struct MachineVerifierPass : public MachineFunctionPass { @@ -288,6 +298,8 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); MFI!=MFE; ++MFI) { visitMachineBasicBlockBefore(MFI); + // Keep track of the current bundle header. + const MachineInstr *CurBundle = 0; for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { if (MBBI->getParent() != MFI) { @@ -295,15 +307,21 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { *OS << "Instruction: " << *MBBI; continue; } - // Skip BUNDLE instruction for now. FIXME: We should add code to verify - // the BUNDLE's specifically. - if (MBBI->isBundle()) - continue; + // Is this a bundle header? + if (!MBBI->isInsideBundle()) { + if (CurBundle) + visitMachineBundleAfter(CurBundle); + CurBundle = MBBI; + visitMachineBundleBefore(CurBundle); + } else if (!CurBundle) + report("No bundle header", MBBI); visitMachineInstrBefore(MBBI); for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) visitMachineOperand(&MBBI->getOperand(I), I); visitMachineInstrAfter(MBBI); } + if (CurBundle) + visitMachineBundleAfter(CurBundle); visitMachineBasicBlockAfter(MFI); } visitMachineFunctionAfter(); @@ -340,9 +358,9 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - *OS << "- basic block: " << MBB->getName() - << " " << (void*)MBB - << " (BB#" << MBB->getNumber() << ")"; + *OS << "- basic block: BB#" << MBB->getNumber() + << ' ' << MBB->getName() + << " (" << (void*)MBB << ')'; if (Indexes) *OS << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; @@ -367,6 +385,28 @@ void MachineVerifier::report(const char *msg, *OS << "\n"; } +void MachineVerifier::report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI) { + report(msg, MF); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI) { + report(msg, MBB); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { BBInfo &MInfo = MBBInfoMap[MBB]; if (!MInfo.reachable) { @@ -384,10 +424,10 @@ void MachineVerifier::visitMachineFunctionBefore() { // A sub-register of a reserved register is also reserved for (int Reg = regsReserved.find_first(); Reg>=0; Reg = regsReserved.find_next(Reg)) { - for (const uint16_t *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { // FIXME: This should probably be: - // assert(regsReserved.test(*Sub) && "Non-reserved sub-register"); - regsReserved.set(*Sub); + // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register"); + regsReserved.set(*SubRegs); } } @@ -466,8 +506,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && MBB->back().isBarrier() && - !TII->isPredicated(&MBB->back())) { + if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() && + !TII->isPredicated(getBundleStart(&MBB->back()))) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } @@ -487,10 +527,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); - } else if (!MBB->back().isBarrier()) { + } else if (!getBundleStart(&MBB->back())->isBarrier()) { report("MBB exits via unconditional branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().isTerminator()) { + } else if (!getBundleStart(&MBB->back())->isTerminator()) { report("MBB exits via unconditional branch but the branch isn't a " "terminator instruction!", MBB); } @@ -510,10 +550,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); - } else if (MBB->back().isBarrier()) { + } else if (getBundleStart(&MBB->back())->isBarrier()) { report("MBB exits via conditional branch/fall-through but ends with a " "barrier instruction!", MBB); - } else if (!MBB->back().isTerminator()) { + } else if (!getBundleStart(&MBB->back())->isTerminator()) { report("MBB exits via conditional branch/fall-through but the branch " "isn't a terminator instruction!", MBB); } @@ -530,10 +570,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/branch but doesn't " "contain any instructions!", MBB); - } else if (!MBB->back().isBarrier()) { + } else if (!getBundleStart(&MBB->back())->isBarrier()) { report("MBB exits via conditional branch/branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().isTerminator()) { + } else if (!getBundleStart(&MBB->back())->isTerminator()) { report("MBB exits via conditional branch/branch but the branch " "isn't a terminator instruction!", MBB); } @@ -554,8 +594,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { continue; } regsLive.insert(*I); - for (const uint16_t *R = TRI->getSubRegisters(*I); *R; R++) - regsLive.insert(*R); + for (MCSubRegIterator SubRegs(*I, TRI); SubRegs.isValid(); ++SubRegs) + regsLive.insert(*SubRegs); } regsLiveInButUnused = regsLive; @@ -564,8 +604,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { BitVector PR = MFI->getPristineRegs(MBB); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { regsLive.insert(I); - for (const uint16_t *R = TRI->getSubRegisters(I); *R; R++) - regsLive.insert(*R); + for (MCSubRegIterator SubRegs(I, TRI); SubRegs.isValid(); ++SubRegs) + regsLive.insert(*SubRegs); } regsKilled.clear(); @@ -575,6 +615,30 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { lastIndex = Indexes->getMBBStartIdx(MBB); } +// This function gets called for all bundle headers, including normal +// stand-alone unbundled instructions. +void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { + if (Indexes && Indexes->hasIndex(MI)) { + SlotIndex idx = Indexes->getInstructionIndex(MI); + if (!(idx > lastIndex)) { + report("Instruction index out of order", MI); + *OS << "Last instruction was at " << lastIndex << '\n'; + } + lastIndex = idx; + } + + // Ensure non-terminators don't follow terminators. + // Ignore predicated terminators formed by if conversion. + // FIXME: If conversion shouldn't need to violate this rule. + if (MI->isTerminator() && !TII->isPredicated(MI)) { + if (!FirstTerminator) + FirstTerminator = MI; + } else if (FirstTerminator) { + report("Non-terminator instruction after the first terminator", MI); + *OS << "First terminator was:\t" << *FirstTerminator; + } +} + void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); if (MI->getNumOperands() < MCID.getNumOperands()) { @@ -608,17 +672,6 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } } - // Ensure non-terminators don't follow terminators. - // Ignore predicated terminators formed by if conversion. - // FIXME: If conversion shouldn't need to violate this rule. - if (MI->isTerminator() && !TII->isPredicated(MI)) { - if (!FirstTerminator) - FirstTerminator = MI; - } else if (FirstTerminator) { - report("Non-terminator instruction after the first terminator", MI); - *OS << "First terminator was:\t" << *FirstTerminator; - } - StringRef ErrorInfo; if (!TII->verifyInstruction(MI, ErrorInfo)) report(ErrorInfo.data(), MI); @@ -634,7 +687,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MONum < MCID.getNumDefs()) { if (!MO->isReg()) report("Explicit definition must be a register", MO, MONum); - else if (!MO->isDef()) + else if (!MO->isDef() && !MCOI.isOptionalDef()) report("Explicit definition marked as use", MO, MONum); else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); @@ -662,6 +715,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MRI->tracksLiveness() && !MI->isDebugValue()) checkLiveness(MO, MONum); + // Verify two-address constraints after leaving SSA form. + unsigned DefIdx; + if (!MRI->isSSA() && MO->isUse() && + MI->isRegTiedToDefOperand(MONum, &DefIdx) && + Reg != MI->getOperand(DefIdx).getReg()) + report("Two-address instruction operands must be identical", MO, MONum); // Check register classes. if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { @@ -672,7 +731,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("Illegal subregister index for physical register", MO, MONum); return; } - if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { + if (const TargetRegisterClass *DRC = + TII->getRegClass(MCID, MONum, TRI, *MF)) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); *OS << TRI->getName(Reg) << " is not a " @@ -698,7 +758,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { return; } } - if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { + if (const TargetRegisterClass *DRC = + TII->getRegClass(MCID, MONum, TRI, *MF)) { if (SubIdx) { const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(RC); @@ -761,20 +822,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (MO->readsReg()) { regsLiveInButUnused.erase(Reg); - bool isKill = false; - unsigned defIdx; - if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { - // A two-addr use counts as a kill if use and def are the same. - unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) - isKill = true; - else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - report("Two-address instruction operands must be identical", MO, MONum); - } - } else - isKill = MO->isKill(); - - if (isKill) + if (MO->isKill()) addRegWithSubRegs(regsKilled, Reg); // Check that LiveVars knows this kill. @@ -786,23 +834,44 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } // Check LiveInts liveness and kill. - if (TargetRegisterInfo::isVirtualRegister(Reg) && - LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (!LI.liveAt(UseIdx)) { - report("No live range at use", MO, MONum); - *OS << UseIdx << " is not live in " << LI << '\n'; + if (LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI); + // Check the cached regunit intervals. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) { + LiveRangeQuery LRQ(*LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) + << ' ' << *LI << '\n'; + } + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n'; + } + } } - // Check for extra kill flags. - // Note that we allow missing kill flags for now. - if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { - report("Live range continues after kill flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; + } + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (LiveInts->hasInterval(Reg)) { + // This is a virtual register interval. + const LiveInterval &LI = LiveInts->getInterval(Reg); + LiveRangeQuery LRQ(LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } else { + report("Virtual register has no live interval", MO, MONum); } - } else { - report("Virtual register has no Live interval", MO, MONum); } } @@ -812,6 +881,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Reserved registers may be used even when 'dead'. if (!isReserved(Reg)) report("Using an undefined physical register", MO, MONum); + } else if (MRI->def_empty(Reg)) { + report("Reading virtual register without a def", MO, MONum); } else { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; // We don't know which virtual registers are live in, so only complain @@ -841,12 +912,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Check LiveInts for a live range, but only for virtual registers. if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && !LiveInts->isNotInMIMap(MI)) { - SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot(); + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI); + DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber()); if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { assert(VNI && "NULL valno is not allowed"); - if (VNI->def != DefIdx && !MO->isEarlyClobber()) { + if (VNI->def != DefIdx) { report("Inconsistent valno->def", MO, MONum); *OS << "Valno " << VNI->id << " is not defined at " << DefIdx << " in " << LI << '\n'; @@ -863,6 +935,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { +} + +// This function gets called after visiting all instructions in a bundle. The +// argument points to the bundle header. +// Normal stand-alone instructions are also considered 'bundles', and this +// function is called for all of them. +void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; set_union(MInfo.regsKilled, regsKilled); set_subtract(regsLive, regsKilled); regsKilled.clear(); @@ -876,15 +955,6 @@ void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { } set_subtract(regsLive, regsDead); regsDead.clear(); set_union(regsLive, regsDefined); regsDefined.clear(); - - if (Indexes && Indexes->hasIndex(MI)) { - SlotIndex idx = Indexes->getInstructionIndex(MI); - if (!(idx > lastIndex)) { - report("Instruction index out of order", MI); - *OS << "Last instruction was at " << lastIndex << '\n'; - } - lastIndex = idx; - } } void @@ -1025,7 +1095,21 @@ void MachineVerifier::visitMachineFunctionAfter() { // Now check liveness info if available calcRegsRequired(); - if (MRI->isSSA() && !MF->empty()) { + // Check for killed virtual registers that should be live out. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + for (RegSet::iterator + I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; + ++I) + if (MInfo.regsKilled.count(*I)) { + report("Virtual register killed in block, but needed live out.", MFI); + *OS << "Virtual register " << PrintReg(*I) + << " is used after the block.\n"; + } + } + + if (!MF->empty()) { BBInfo &MInfo = MBBInfoMap[&MF->front()]; for (RegSet::iterator I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; @@ -1069,292 +1153,298 @@ void MachineVerifier::verifyLiveVariables() { void MachineVerifier::verifyLiveIntervals() { assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); - for (LiveIntervals::const_iterator LVI = LiveInts->begin(), - LVE = LiveInts->end(); LVI != LVE; ++LVI) { - const LiveInterval &LI = *LVI->second; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); // Spilling and splitting may leave unused registers around. Skip them. - if (MRI->use_empty(LI.reg)) + if (MRI->reg_nodbg_empty(Reg)) continue; - // Physical registers have much weirdness going on, mostly from coalescing. - // We should probably fix it, but for now just ignore them. - if (TargetRegisterInfo::isPhysicalRegister(LI.reg)) + if (!LiveInts->hasInterval(Reg)) { + report("Missing live interval for virtual register", MF); + *OS << PrintReg(Reg, TRI) << " still has defs or uses\n"; continue; + } - assert(LVI->first == LI.reg && "Invalid reg to interval mapping"); + const LiveInterval &LI = LiveInts->getInterval(Reg); + assert(Reg == LI.reg && "Invalid reg to interval mapping"); + verifyLiveInterval(LI); + } - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I!=E; ++I) { - VNInfo *VNI = *I; - const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); + // Verify all the cached regunit intervals. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i)) + verifyLiveInterval(*LI); +} - if (!DefVNI) { - if (!VNI->isUnused()) { - report("Valno not live at def and not marked unused", MF); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } - continue; - } +void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, + VNInfo *VNI) { + if (VNI->isUnused()) + return; - if (VNI->isUnused()) - continue; + const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); - if (DefVNI != VNI) { - report("Live range at def has different valno", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " where valno #" << DefVNI->id << " is live in " << LI << '\n'; - continue; - } + if (!DefVNI) { + report("Valno not live at def and not marked unused", MF, LI); + *OS << "Valno #" << VNI->id << '\n'; + return; + } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); - if (!MBB) { - report("Invalid definition index", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - continue; - } + if (DefVNI != VNI) { + report("Live range at def has different valno", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " where valno #" << DefVNI->id << " is live\n"; + return; + } - if (VNI->isPHIDef()) { - if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << ", not at the beginning of BB#" << MBB->getNumber() - << " in " << LI << '\n'; - } - } else { - // Non-PHI def. - const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); - if (!MI) { - report("No instruction at def index", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - continue; - } + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); + if (!MBB) { + report("Invalid definition index", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " in " << LI << '\n'; + return; + } - bool hasDef = false; - bool isEarlyClobber = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - if (MOI->getReg() != LI.reg) - continue; - } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || - !TRI->regsOverlap(LI.reg, MOI->getReg())) - continue; - } - hasDef = true; - if (MOI->isEarlyClobber()) - isEarlyClobber = true; - } + if (VNI->isPHIDef()) { + if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { + report("PHIDef value is not defined at MBB start", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; + } + return; + } - if (!hasDef) { - report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } + // Non-PHI def. + const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); + if (!MI) { + report("No instruction at def index", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + return; + } - // Early clobber defs begin at USE slots, but other defs must begin at - // DEF slots. - if (isEarlyClobber) { - if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - } - } else if (!VNI->def.isRegister()) { - report("Non-PHI, non-early clobber def must be at a register slot", - MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - } - } + bool hasDef = false; + bool isEarlyClobber = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + if (MOI->getReg() != LI.reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->hasRegUnit(MOI->getReg(), LI.reg)) + continue; } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; + } - for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) { - const VNInfo *VNI = I->valno; - assert(VNI && "Live range has no valno"); + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + } - if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { - report("Foreign valno in live range", MF); - I->print(*OS); - *OS << " has a valno not in " << LI << '\n'; - } + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } +} - if (VNI->isUnused()) { - report("Live range valno is marked unused", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - } +void +MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, + LiveInterval::const_iterator I) { + const VNInfo *VNI = I->valno; + assert(VNI && "Live range has no valno"); + + if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { + report("Foreign valno in live range", MF, LI); + *OS << *I << " has a bad valno\n"; + } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); - if (!MBB) { - report("Bad start of live segment, no basic block", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - continue; - } - SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); - if (I->start != MBBStartIdx && I->start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB); - I->print(*OS); - *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; - } + if (VNI->isUnused()) { + report("Live range valno is marked unused", MF, LI); + *OS << *I << '\n'; + } - const MachineBasicBlock *EndMBB = - LiveInts->getMBBFromIndex(I->end.getPrevSlot()); - if (!EndMBB) { - report("Bad end of live segment, no basic block", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - continue; - } + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); + if (!MBB) { + report("Bad start of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } + SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); + if (I->start != MBBStartIdx && I->start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB, LI); + *OS << *I << '\n'; + } - // No more checks for live-out segments. - if (I->end == LiveInts->getMBBEndIdx(EndMBB)) - continue; + const MachineBasicBlock *EndMBB = + LiveInts->getMBBFromIndex(I->end.getPrevSlot()); + if (!EndMBB) { + report("Bad end of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } - // The live segment is ending inside EndMBB - const MachineInstr *MI = - LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); - if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB); - I->print(*OS); - *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; + // No more checks for live-out segments. + if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + return; + + // RegUnit intervals are allowed dead phis. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() && + I->start == VNI->def && I->end == VNI->def.getDeadSlot()) + return; + + // The live segment is ending inside EndMBB + const MachineInstr *MI = + LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + if (!MI) { + report("Live segment doesn't end at a valid instruction", EndMBB, LI); + *OS << *I << '\n'; + return; + } + + // The block slot must refer to a basic block boundary. + if (I->end.isBlock()) { + report("Live segment ends at B slot of an instruction", EndMBB, LI); + *OS << *I << '\n'; + } + + if (I->end.isDead()) { + // Segment ends on the dead slot. + // That means there must be a dead def. + if (!SlotIndex::isSameInstr(I->start, I->end)) { + report("Live segment ending at dead slot spans instructions", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // A live segment can only end at an early-clobber slot if it is being + // redefined by an early-clobber def. + if (I->end.isEarlyClobber()) { + if (I+1 == LI.end() || (I+1)->start != I->end) { + report("Live segment ending at early clobber slot must be " + "redefined by an EC def in the same instruction", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // The following checks only apply to virtual registers. Physreg liveness + // is too weird to check. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // A live range can end with either a redefinition, a kill flag on a + // use, or a dead flag on a def. + bool hasRead = false; + bool hasDeadDef = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || MOI->getReg() != LI.reg) continue; - } + if (MOI->readsReg()) + hasRead = true; + if (MOI->isDef() && MOI->isDead()) + hasDeadDef = true; + } - // The block slot must refer to a basic block boundary. - if (I->end.isBlock()) { - report("Live segment ends at B slot of an instruction", MI); + if (I->end.isDead()) { + if (!hasDeadDef) { + report("Instruction doesn't have a dead def operand", MI); I->print(*OS); *OS << " in " << LI << '\n'; } - - if (I->end.isDead()) { - // Segment ends on the dead slot. - // That means there must be a dead def. - if (!SlotIndex::isSameInstr(I->start, I->end)) { - report("Live segment ending at dead slot spans instructions", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } - - // A live segment can only end at an early-clobber slot if it is being - // redefined by an early-clobber def. - if (I->end.isEarlyClobber()) { - if (I+1 == E || (I+1)->start != I->end) { - report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } + } else { + if (!hasRead) { + report("Instruction ending live range doesn't read the register", MI); + *OS << *I << " in " << LI << '\n'; } + } + } - // The following checks only apply to virtual registers. Physreg liveness - // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // A live range can end with either a redefinition, a kill flag on a - // use, or a dead flag on a def. - bool hasRead = false; - bool hasDeadDef = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || MOI->getReg() != LI.reg) - continue; - if (MOI->readsReg()) - hasRead = true; - if (MOI->isDef() && MOI->isDead()) - hasDeadDef = true; - } - - if (I->end.isDead()) { - if (!hasDeadDef) { - report("Instruction doesn't have a dead def operand", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } else { - if (!hasRead) { - report("Instruction ending live range doesn't read the register", - MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } - } + // Now check all the basic blocks in this live segment. + MachineFunction::const_iterator MFI = MBB; + // Is this live range the beginning of a non-PHIDef VN? + if (I->start == VNI->def && !VNI->isPHIDef()) { + // Not live-in to any blocks. + if (MBB == EndMBB) + return; + // Skip this block. + ++MFI; + } + for (;;) { + assert(LiveInts->isLiveInToMBB(LI, MFI)); + // We don't know how to track physregs into a landing pad. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && + MFI->isLandingPad()) { + if (&*MFI == EndMBB) + break; + ++MFI; + continue; + } - // Now check all the basic blocks in this live segment. - MachineFunction::const_iterator MFI = MBB; - // Is this live range the beginning of a non-PHIDef VN? - if (I->start == VNI->def && !VNI->isPHIDef()) { - // Not live-in to any blocks. - if (MBB == EndMBB) - continue; - // Skip this block. - ++MFI; + // Is VNI a PHI-def in the current block? + bool IsPHI = VNI->isPHIDef() && + VNI->def == LiveInts->getMBBStartIdx(MFI); + + // Check that VNI is live-out of all predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), + PE = MFI->pred_end(); PI != PE; ++PI) { + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); + + // All predecessors must have a live-out value. + if (!PVNI) { + report("Register not marked live out of predecessor", *PI, LI); + *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " + << PEnd << '\n'; + continue; } - for (;;) { - assert(LiveInts->isLiveInToMBB(LI, MFI)); - // We don't know how to track physregs into a landing pad. - if (TargetRegisterInfo::isPhysicalRegister(LI.reg) && - MFI->isLandingPad()) { - if (&*MFI == EndMBB) - break; - ++MFI; - continue; - } - // Check that VNI is live-out of all predecessors. - for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), - PE = MFI->pred_end(); PI != PE; ++PI) { - SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); - const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); - - if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) - continue; - - if (!PVNI) { - report("Register not marked live out of predecessor", *PI); - *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " - << PEnd << " in " << LI << '\n'; - continue; - } - if (PVNI != VNI) { - report("Different value live out of predecessor", *PI); - *OS << "Valno #" << PVNI->id << " live out of BB#" - << (*PI)->getNumber() << '@' << PEnd - << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n'; - } - } - if (&*MFI == EndMBB) - break; - ++MFI; + // Only PHI-defs can take different predecessor values. + if (!IsPHI && PVNI != VNI) { + report("Different value live out of predecessor", *PI, LI); + *OS << "Valno #" << PVNI->id << " live out of BB#" + << (*PI)->getNumber() << '@' << PEnd + << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << '\n'; } } + if (&*MFI == EndMBB) + break; + ++MFI; + } +} - // Check the LI only has one connected component. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - ConnectedVNInfoEqClasses ConEQ(*LiveInts); - unsigned NumComp = ConEQ.Classify(&LI); - if (NumComp > 1) { - report("Multiple connected components in live interval", MF); - *OS << NumComp << " components in " << LI << '\n'; - for (unsigned comp = 0; comp != NumComp; ++comp) { - *OS << comp << ": valnos"; - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), - E = LI.vni_end(); I!=E; ++I) - if (comp == ConEQ.getEqClass(*I)) - *OS << ' ' << (*I)->id; - *OS << '\n'; - } +void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I!=E; ++I) + verifyLiveIntervalValue(LI, *I); + + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) + verifyLiveIntervalSegment(LI, I); + + // Check the LI only has one connected component. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + ConnectedVNInfoEqClasses ConEQ(*LiveInts); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp > 1) { + report("Multiple connected components in live interval", MF, LI); + for (unsigned comp = 0; comp != NumComp; ++comp) { + *OS << comp << ": valnos"; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), + E = LI.vni_end(); I!=E; ++I) + if (comp == ConEQ.getEqClass(*I)) + *OS << ' ' << (*I)->id; + *OS << '\n'; } } } } - diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 0ed4c34..e6e23da 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -171,23 +171,30 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, return true; } +/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs. +/// This includes registers with no defs. +static bool isImplicitlyDefined(unsigned VirtReg, + const MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(VirtReg), + DE = MRI->def_end(); DI != DE; ++DI) + if (!DI->isImplicitDef()) + return false; + return true; +} + /// isSourceDefinedByImplicitDef - Return true if all sources of the phi node /// are implicit_def's. static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, const MachineRegisterInfo *MRI) { - for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { - unsigned SrcReg = MPhi->getOperand(i).getReg(); - const MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (!DefMI || !DefMI->isImplicitDef()) + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) + if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI)) return false; - } return true; } - /// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, -/// under the assuption that it needs to be lowered in a way that supports +/// under the assumption that it needs to be lowered in a way that supports /// atomic execution of PHIs. This lowering method is always correct all of the /// time. /// @@ -287,7 +294,8 @@ void PHIElimination::LowerAtomicPHINode( for (int i = NumSrcs - 1; i >= 0; --i) { unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); - + bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || + isImplicitlyDefined(SrcReg, MRI); assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); @@ -295,14 +303,6 @@ void PHIElimination::LowerAtomicPHINode( // path the PHI. MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); - // If source is defined by an implicit def, there is no need to insert a - // copy. - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (DefMI->isImplicitDef()) { - ImpDefs.insert(DefMI); - continue; - } - // Check to make sure we haven't already emitted the copy for this block. // This can happen because PHI nodes may have multiple entries for the same // basic block. @@ -315,12 +315,27 @@ void PHIElimination::LowerAtomicPHINode( findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. - if (!reusedIncoming && IncomingReg) - BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg); + if (!reusedIncoming && IncomingReg) { + if (SrcUndef) { + // The source register is undefined, so there is no need for a real + // COPY, but we still need to ensure joint dominance by defs. + // Insert an IMPLICIT_DEF instruction. + BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg); + + // Clean up the old implicit-def, if there even was one. + if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg)) + if (DefMI->isImplicitDef()) + ImpDefs.insert(DefMI); + } else { + BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg) + .addReg(SrcReg, 0, SrcSubReg); + } + } // Now update live variable information if we have it. Otherwise we're done - if (!LV) continue; + if (SrcUndef || !LV) continue; // We want to be able to insert a kill of the register if this PHI (aka, the // copy we just inserted) is the last use of the source value. Live @@ -340,39 +355,35 @@ void PHIElimination::LowerAtomicPHINode( // add a kill marker in this block saying that it kills the incoming value! if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) { // In our final twist, we have to decide which instruction kills the - // register. In most cases this is the copy, however, the first - // terminator instruction at the end of the block may also use the value. - // In this case, we should mark *it* as being the killing block, not the - // copy. - MachineBasicBlock::iterator KillInst; - MachineBasicBlock::iterator Term = opBlock.getFirstTerminator(); - if (Term != opBlock.end() && Term->readsRegister(SrcReg)) { - KillInst = Term; - - // Check that no other terminators use values. -#ifndef NDEBUG - for (MachineBasicBlock::iterator TI = llvm::next(Term); - TI != opBlock.end(); ++TI) { - if (TI->isDebugValue()) - continue; - assert(!TI->readsRegister(SrcReg) && - "Terminator instructions cannot use virtual registers unless" - "they are the first terminator in a block!"); - } -#endif - } else if (reusedIncoming || !IncomingReg) { - // We may have to rewind a bit if we didn't insert a copy this time. - KillInst = Term; - while (KillInst != opBlock.begin()) { - --KillInst; - if (KillInst->isDebugValue()) - continue; - if (KillInst->readsRegister(SrcReg)) - break; + // register. In most cases this is the copy, however, terminator + // instructions at the end of the block may also use the value. In this + // case, we should mark the last such terminator as being the killing + // block, not the copy. + MachineBasicBlock::iterator KillInst = opBlock.end(); + MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); + for (MachineBasicBlock::iterator Term = FirstTerm; + Term != opBlock.end(); ++Term) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + } + + if (KillInst == opBlock.end()) { + // No terminator uses the register. + + if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't insert a copy this time. + KillInst = FirstTerm; + while (KillInst != opBlock.begin()) { + --KillInst; + if (KillInst->isDebugValue()) + continue; + if (KillInst->readsRegister(SrcReg)) + break; + } + } else { + // We just inserted this copy. + KillInst = prior(InsertPos); } - } else { - // We just inserted this copy. - KillInst = prior(InsertPos); } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); @@ -412,28 +423,71 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. + const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0; + bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader(); + bool Changed = false; for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); - // We break edges when registers are live out from the predecessor block - // (not considering PHI nodes). If the register is live in to this block - // anyway, we would gain nothing from splitting. + // Is there a critical edge from PreMBB to MBB? + if (PreMBB->succ_size() == 1) + continue; + // Avoid splitting backedges of loops. It would introduce small // out-of-line blocks into the loop which is very bad for code placement. - if (PreMBB != &MBB && - !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) { - if (!MLI || - !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) && - MLI->isLoopHeader(&MBB))) { - if (PreMBB->SplitCriticalEdge(&MBB, this)) { - Changed = true; - ++NumCriticalEdgesSplit; - } - } + if (PreMBB == &MBB) + continue; + const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0; + if (IsLoopHeader && PreLoop == CurLoop) + continue; + + // LV doesn't consider a phi use live-out, so isLiveOut only returns true + // when the source register is live-out for some other reason than a phi + // use. That means the copy we will insert in PreMBB won't be a kill, and + // there is a risk it may not be coalesced away. + // + // If the copy would be a kill, there is no need to split the edge. + if (!LV.isLiveOut(Reg, *PreMBB)) + continue; + + DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" + << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() + << ": " << *BBI); + + // If Reg is not live-in to MBB, it means it must be live-in to some + // other PreMBB successor, and we can avoid the interference by splitting + // the edge. + // + // If Reg *is* live-in to MBB, the interference is inevitable and a copy + // is likely to be left after coalescing. If we are looking at a loop + // exiting edge, split it so we won't insert code in the loop, otherwise + // don't bother. + bool ShouldSplit = !LV.isLiveIn(Reg, MBB); + + // Check for a loop exiting edge. + if (!ShouldSplit && CurLoop != PreLoop) { + DEBUG({ + dbgs() << "Split wouldn't help, maybe avoid loop copies?\n"; + if (PreLoop) dbgs() << "PreLoop: " << *PreLoop; + if (CurLoop) dbgs() << "CurLoop: " << *CurLoop; + }); + // This edge could be entering a loop, exiting a loop, or it could be + // both: Jumping directly form one loop to the header of a sibling + // loop. + // Split unless this edge is entering CurLoop from an outer loop. + ShouldSplit = PreLoop && !PreLoop->contains(CurLoop); + } + if (!ShouldSplit) + continue; + if (!PreMBB->SplitCriticalEdge(&MBB, this)) { + DEBUG(dbgs() << "Failed to split ciritcal edge.\n"); + continue; } + Changed = true; + ++NumCriticalEdgesSplit; } } return Changed; diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 490547b..cfa3eec 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -48,6 +49,8 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, cl::desc("Disable Stack Slot Coloring")); static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden, cl::desc("Disable Machine Dead Code Elimination")); +static cl::opt<bool> EnableEarlyIfConversion("enable-early-ifcvt", cl::Hidden, + cl::desc("Enable Early If-conversion")); static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, @@ -80,15 +83,23 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); +static cl::opt<std::string> +PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, + cl::desc("Print machine instrs"), + cl::value_desc("pass-name"), cl::init("option-unspecified")); + +// Experimental option to run live inteerval analysis early. +static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, + cl::desc("Run live interval analysis earlier in the pipeline")); /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. /// These should be converted to boolOrDefault in order to use applyOverride. -static AnalysisID applyDisable(AnalysisID ID, bool Override) { +static AnalysisID applyDisable(AnalysisID PassID, bool Override) { if (Override) - return &NoPassID; - return ID; + return 0; + return PassID; } /// Allow Pass selection to be overriden by command line options. This supports @@ -101,13 +112,13 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override, case cl::BOU_UNSET: return TargetID; case cl::BOU_TRUE: - if (TargetID != &NoPassID) + if (TargetID) return TargetID; - if (StandardID == &NoPassID) + if (StandardID == 0) report_fatal_error("Target cannot enable pass"); return StandardID; case cl::BOU_FALSE: - return &NoPassID; + return 0; } llvm_unreachable("Invalid command line option state"); } @@ -149,6 +160,9 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { if (StandardID == &DeadMachineInstructionElimID) return applyDisable(TargetID, DisableMachineDCE); + if (StandardID == &EarlyIfConverterID) + return applyDisable(TargetID, !EnableEarlyIfConversion); + if (StandardID == &MachineLICMID) return applyDisable(TargetID, DisableMachineLICM); @@ -178,9 +192,6 @@ INITIALIZE_PASS(TargetPassConfig, "targetpassconfig", "Target Pass Configuration", false, false) char TargetPassConfig::ID = 0; -static char NoPassIDAnchor = 0; -char &llvm::NoPassID = NoPassIDAnchor; - // Pseudo Pass IDs. char TargetPassConfig::EarlyTailDuplicateID = 0; char TargetPassConfig::PostRAMachineLICMID = 0; @@ -193,9 +204,13 @@ public: // that are part of a standard pass pipeline without overridding the entire // pipeline. This mechanism allows target options to inherit a standard pass's // user interface. For example, a target may disable a standard pass by - // default by substituting NoPass, and the user may still enable that standard - // pass with an explicit command line option. + // default by substituting a pass ID of zero, and the user may still enable + // that standard pass with an explicit command line option. DenseMap<AnalysisID,AnalysisID> TargetPasses; + + /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass + /// is inserted after each instance of the first one. + SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses; }; } // namespace llvm @@ -207,7 +222,8 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) - : ImmutablePass(ID), TM(tm), PM(&pm), Impl(0), Initialized(false), + : ImmutablePass(ID), PM(&pm), StartAfter(0), StopAfter(0), + Started(true), Stopped(false), TM(tm), Impl(0), Initialized(false), DisableVerify(false), EnableTailMerge(true) { @@ -218,11 +234,22 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) initializeCodeGen(*PassRegistry::getPassRegistry()); // Substitute Pseudo Pass IDs for real ones. - substitutePass(EarlyTailDuplicateID, TailDuplicateID); - substitutePass(PostRAMachineLICMID, MachineLICMID); + substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); + substitutePass(&PostRAMachineLICMID, &MachineLICMID); + + // Disable early if-conversion. Targets that are ready can enable it. + disablePass(&EarlyIfConverterID); // Temporarily disable experimental passes. - substitutePass(MachineSchedulerID, NoPassID); + substitutePass(&MachineSchedulerID, 0); +} + +/// Insert InsertedPassID pass after TargetPassID. +void TargetPassConfig::insertPass(AnalysisID TargetPassID, + AnalysisID InsertedPassID) { + assert(TargetPassID != InsertedPassID && "Insert a pass after itself!"); + std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID); + Impl->InsertedPasses.push_back(P); } /// createPassConfig - Create a pass configuration object to be used by @@ -244,8 +271,9 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) { Opt = Val; } -void TargetPassConfig::substitutePass(char &StandardID, char &TargetID) { - Impl->TargetPasses[&StandardID] = &TargetID; +void TargetPassConfig::substitutePass(AnalysisID StandardID, + AnalysisID TargetID) { + Impl->TargetPasses[StandardID] = TargetID; } AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const { @@ -256,29 +284,62 @@ AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const { return I->second; } -/// Add a CodeGen pass at this point in the pipeline after checking for target -/// and command line overrides. -AnalysisID TargetPassConfig::addPass(char &ID) { +/// Add a pass to the PassManager if that pass is supposed to be run. If the +/// Started/Stopped flags indicate either that the compilation should start at +/// a later pass or that it should stop after an earlier pass, then do not add +/// the pass. Finally, compare the current pass against the StartAfter +/// and StopAfter options and change the Started/Stopped flags accordingly. +void TargetPassConfig::addPass(Pass *P) { assert(!Initialized && "PassConfig is immutable"); - AnalysisID TargetID = getPassSubstitution(&ID); - AnalysisID FinalID = overridePass(&ID, TargetID); - if (FinalID == &NoPassID) + // Cache the Pass ID here in case the pass manager finds this pass is + // redundant with ones already scheduled / available, and deletes it. + // Fundamentally, once we add the pass to the manager, we no longer own it + // and shouldn't reference it. + AnalysisID PassID = P->getPassID(); + + if (Started && !Stopped) + PM->add(P); + if (StopAfter == PassID) + Stopped = true; + if (StartAfter == PassID) + Started = true; + if (Stopped && !Started) + report_fatal_error("Cannot stop compilation after pass that is not run"); +} + +/// Add a CodeGen pass at this point in the pipeline after checking for target +/// and command line overrides. +AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { + AnalysisID TargetID = getPassSubstitution(PassID); + AnalysisID FinalID = overridePass(PassID, TargetID); + if (FinalID == 0) return FinalID; Pass *P = Pass::createPass(FinalID); if (!P) llvm_unreachable("Pass ID not registered"); - PM->add(P); + addPass(P); + // Add the passes after the pass P if there is any. + for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator + I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end(); + I != E; ++I) { + if ((*I).first == PassID) { + assert((*I).second && "Illegal Pass ID!"); + Pass *NP = Pass::createPass((*I).second); + assert(NP && "Pass ID not registered"); + addPass(NP); + } + } return FinalID; } -void TargetPassConfig::printAndVerify(const char *Banner) const { +void TargetPassConfig::printAndVerify(const char *Banner) { if (TM->shouldPrintMachineCode()) - PM->add(createMachineFunctionPrinterPass(dbgs(), Banner)); + addPass(createMachineFunctionPrinterPass(dbgs(), Banner)); if (VerifyMachineCode) - PM->add(createMachineVerifierPass(Banner)); + addPass(createMachineVerifierPass(Banner)); } /// Add common target configurable passes that perform LLVM IR to IR transforms @@ -288,46 +349,73 @@ void TargetPassConfig::addIRPasses() { // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. - PM->add(createTypeBasedAliasAnalysisPass()); - PM->add(createBasicAliasAnalysisPass()); + addPass(createTypeBasedAliasAnalysisPass()); + addPass(createBasicAliasAnalysisPass()); // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. if (!DisableVerify) - PM->add(createVerifierPass()); + addPass(createVerifierPass()); // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { - PM->add(createLoopStrengthReducePass(getTargetLowering())); + addPass(createLoopStrengthReducePass(getTargetLowering())); if (PrintLSR) - PM->add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } - PM->add(createGCLoweringPass()); + addPass(createGCLoweringPass()); // Make sure that no unreachable blocks are instruction selected. - PM->add(createUnreachableBlockEliminationPass()); + addPass(createUnreachableBlockEliminationPass()); +} + +/// Turn exception handling constructs into something the code generators can +/// handle. +void TargetPassConfig::addPassesToHandleExceptions() { + switch (TM->getMCAsmInfo()->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. + addPass(createSjLjEHPreparePass(TM->getTargetLowering())); + // FALLTHROUGH + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ARM: + case ExceptionHandling::Win64: + addPass(createDwarfEHPass(TM)); + break; + case ExceptionHandling::None: + addPass(createLowerInvokePass(TM->getTargetLowering())); + + // The lower invoke pass may create unreachable code. Remove it. + addPass(createUnreachableBlockEliminationPass()); + break; + } } /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. void TargetPassConfig::addISelPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) - PM->add(createCodeGenPreparePass(getTargetLowering())); + addPass(createCodeGenPreparePass(getTargetLowering())); - PM->add(createStackProtectorPass(getTargetLowering())); + addPass(createStackProtectorPass(getTargetLowering())); addPreISel(); if (PrintISelInput) - PM->add(createPrintFunctionPass("\n\n" + addPass(createPrintFunctionPass("\n\n" "*** Final LLVM Code input to ISel ***\n", &dbgs())); // All passes which modify the LLVM IR are now complete; run the verifier // to ensure that the IR is valid. if (!DisableVerify) - PM->add(createVerifierPass()); + addPass(createVerifierPass()); } /// Add the complete set of target-independent postISel code generator passes. @@ -349,11 +437,26 @@ void TargetPassConfig::addISelPrepare() { /// TODO: We could use a single addPre/Post(ID) hook to allow pass injection /// before/after any target-independent pass. But it's currently overkill. void TargetPassConfig::addMachinePasses() { + // Insert a machine instr printer pass after the specified pass. + // If -print-machineinstrs specified, print machineinstrs after all passes. + if (StringRef(PrintMachineInstrs.getValue()).equals("")) + TM->Options.PrintMachineCode = true; + else if (!StringRef(PrintMachineInstrs.getValue()) + .equals("option-unspecified")) { + const PassRegistry *PR = PassRegistry::getPassRegistry(); + const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); + const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs")); + assert (TPI && IPI && "Pass ID not registered!"); + const char *TID = (char *)(TPI->getTypeInfo()); + const char *IID = (char *)(IPI->getTypeInfo()); + insertPass(TID, IID); + } + // Print the instruction selected machine code... printAndVerify("After Instruction Selection"); // Expand pseudo-instructions emitted by ISel. - addPass(ExpandISelPseudosID); + addPass(&ExpandISelPseudosID); // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { @@ -362,7 +465,7 @@ void TargetPassConfig::addMachinePasses() { else { // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(LocalStackSlotAllocationID); + addPass(&LocalStackSlotAllocationID); } // Run pre-ra passes. @@ -381,7 +484,7 @@ void TargetPassConfig::addMachinePasses() { printAndVerify("After PostRegAlloc passes"); // Insert prolog/epilog code. Eliminate abstract frame index references... - addPass(PrologEpilogCodeInserterID); + addPass(&PrologEpilogCodeInserterID); printAndVerify("After PrologEpilogCodeInserter"); /// Add passes that optimize machine instructions after register allocation. @@ -389,7 +492,7 @@ void TargetPassConfig::addMachinePasses() { addMachineLateOptimization(); // Expand pseudo instructions before second scheduling pass. - addPass(ExpandPostRAPseudosID); + addPass(&ExpandPostRAPseudosID); printAndVerify("After ExpandPostRAPseudos"); // Run pre-sched2 passes. @@ -398,14 +501,14 @@ void TargetPassConfig::addMachinePasses() { // Second pass scheduler. if (getOptLevel() != CodeGenOpt::None) { - addPass(PostRASchedulerID); + addPass(&PostRASchedulerID); printAndVerify("After PostRAScheduler"); } // GC - addPass(GCMachineCodeAnalysisID); + addPass(&GCMachineCodeAnalysisID); if (PrintGCInfo) - PM->add(createGCInfoPrinter(dbgs())); + addPass(createGCInfoPrinter(dbgs())); // Basic block placement. if (getOptLevel() != CodeGenOpt::None) @@ -418,30 +521,31 @@ void TargetPassConfig::addMachinePasses() { /// Add passes that optimize machine instructions in SSA form. void TargetPassConfig::addMachineSSAOptimization() { // Pre-ra tail duplication. - if (addPass(EarlyTailDuplicateID) != &NoPassID) + if (addPass(&EarlyTailDuplicateID)) printAndVerify("After Pre-RegAlloc TailDuplicate"); // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. - addPass(OptimizePHIsID); + addPass(&OptimizePHIsID); // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(LocalStackSlotAllocationID); + addPass(&LocalStackSlotAllocationID); // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only // used by tail calls, where the tail calls reuse the incoming stack // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). - addPass(DeadMachineInstructionElimID); + addPass(&DeadMachineInstructionElimID); printAndVerify("After codegen DCE pass"); - addPass(MachineLICMID); - addPass(MachineCSEID); - addPass(MachineSinkingID); + addPass(&EarlyIfConverterID); + addPass(&MachineLICMID); + addPass(&MachineCSEID); + addPass(&MachineSinkingID); printAndVerify("After Machine LICM, CSE and Sinking passes"); - addPass(PeepholeOptimizerID); + addPass(&PeepholeOptimizerID); printAndVerify("After codegen peephole optimization pass"); } @@ -519,10 +623,10 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { /// Add the minimum set of target-independent passes that are required for /// register allocation. No coalescing or scheduling. void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { - addPass(PHIEliminationID); - addPass(TwoAddressInstructionPassID); + addPass(&PHIEliminationID); + addPass(&TwoAddressInstructionPassID); - PM->add(RegAllocPass); + addPass(RegAllocPass); printAndVerify("After Register Allocation"); } @@ -530,42 +634,51 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { /// optimized register allocation, including coalescing, machine instruction /// scheduling, and register allocation itself. void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + addPass(&ProcessImplicitDefsID); + // LiveVariables currently requires pure SSA form. // // FIXME: Once TwoAddressInstruction pass no longer uses kill flags, // LiveVariables can be removed completely, and LiveIntervals can be directly // computed. (We still either need to regenerate kill flags after regalloc, or // preferably fix the scavenger to not depend on them). - addPass(LiveVariablesID); + addPass(&LiveVariablesID); // Add passes that move from transformed SSA into conventional SSA. This is a // "copy coalescing" problem. // if (!EnableStrongPHIElim) { // Edge splitting is smarter with machine loop info. - addPass(MachineLoopInfoID); - addPass(PHIEliminationID); + addPass(&MachineLoopInfoID); + addPass(&PHIEliminationID); } - addPass(TwoAddressInstructionPassID); - // FIXME: Either remove this pass completely, or fix it so that it works on - // SSA form. We could modify LiveIntervals to be independent of this pass, But - // it would be even better to simply eliminate *all* IMPLICIT_DEFs before - // leaving SSA. - addPass(ProcessImplicitDefsID); + // Eventually, we want to run LiveIntervals before PHI elimination. + if (EarlyLiveIntervals) + addPass(&LiveIntervalsID); + + addPass(&TwoAddressInstructionPassID); if (EnableStrongPHIElim) - addPass(StrongPHIEliminationID); + addPass(&StrongPHIEliminationID); - addPass(RegisterCoalescerID); + addPass(&RegisterCoalescerID); // PreRA instruction scheduling. - if (addPass(MachineSchedulerID) != &NoPassID) + if (addPass(&MachineSchedulerID)) printAndVerify("After Machine Scheduling"); // Add the selected register allocation pass. - PM->add(RegAllocPass); - printAndVerify("After Register Allocation"); + addPass(RegAllocPass); + printAndVerify("After Register Allocation, before rewriter"); + + // Allow targets to change the register assignments before rewriting. + if (addPreRewrite()) + printAndVerify("After pre-rewrite passes"); + + // Finally rewrite virtual registers. + addPass(&VirtRegRewriterID); + printAndVerify("After Virtual Register Rewriter"); // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature, // but eventually, all users of it should probably be moved to addPostRA and @@ -579,12 +692,12 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // // FIXME: Re-enable coloring with register when it's capable of adding // kill markers. - addPass(StackSlotColoringID); + addPass(&StackSlotColoringID); // Run post-ra machine LICM to hoist reloads / remats. // // FIXME: can this move into MachineLateOptimization? - addPass(PostRAMachineLICMID); + addPass(&PostRAMachineLICMID); printAndVerify("After StackSlotColoring and postra Machine LICM"); } @@ -596,33 +709,33 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { /// Add passes that optimize machine instructions after register allocation. void TargetPassConfig::addMachineLateOptimization() { // Branch folding must be run after regalloc and prolog/epilog insertion. - if (addPass(BranchFolderPassID) != &NoPassID) + if (addPass(&BranchFolderPassID)) printAndVerify("After BranchFolding"); // Tail duplication. - if (addPass(TailDuplicateID) != &NoPassID) + if (addPass(&TailDuplicateID)) printAndVerify("After TailDuplicate"); // Copy propagation. - if (addPass(MachineCopyPropagationID) != &NoPassID) + if (addPass(&MachineCopyPropagationID)) printAndVerify("After copy propagation pass"); } /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { - AnalysisID ID = &NoPassID; + AnalysisID PassID = 0; if (!DisableBlockPlacement) { // MachineBlockPlacement is a new pass which subsumes the functionality of // CodPlacementOpt. The old code placement pass can be restored by // disabling block placement, but eventually it will be removed. - ID = addPass(MachineBlockPlacementID); + PassID = addPass(&MachineBlockPlacementID); } else { - ID = addPass(CodePlacementOptID); + PassID = addPass(&CodePlacementOptID); } - if (ID != &NoPassID) { + if (PassID) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) - addPass(MachineBlockPlacementStatsID); + addPass(&MachineBlockPlacementStatsID); printAndVerify("After machine block placement."); } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 9c5c029..6bc7e37 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -31,6 +31,15 @@ // same flag that the "cmp" instruction sets and that "bz" uses, then we can // eliminate the "cmp" instruction. // +// Another instance, in this code: +// +// sub r1, r3 | sub r1, imm +// cmp r3, r1 or cmp r1, r3 | cmp r1, imm +// bge L1 +// +// If the branch instruction can use flag from "sub", then we can replace +// "sub" with "subs" and eliminate the "cmp" instruction. +// // - Optimize Bitcast pairs: // // v1 = bitcast v0 @@ -69,6 +78,7 @@ STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); +STATISTIC(NumLoadFold, "Number of loads folded"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -95,16 +105,17 @@ namespace { } private: - bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); - bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); - bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); + bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); + bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs); bool isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); - bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, + bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg); }; } @@ -116,7 +127,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", "Peephole Optimizations", false, false) -/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads +/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads /// a single register and writes a single register and it does not modify the /// source, and if the source value is preserved as a sub-register of the /// result, then replace all reachable uses of the source with the subreg of the @@ -126,7 +137,7 @@ INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. bool PeepholeOptimizer:: -OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, +optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs) { unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) @@ -136,16 +147,30 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; - MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); - if (++UI == MRI->use_nodbg_end()) + if (MRI->hasOneNonDBGUse(SrcReg)) // No other uses. return false; + // Ensure DstReg can get a register class that actually supports + // sub-registers. Don't change the class until we commit. + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx); + if (!DstRC) + return false; + + // The ext instr may be operating on a sub-register of SrcReg as well. + // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit + // register. + // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of + // SrcReg:SubIdx should be replaced. + bool UseSrcSubIdx = TM->getRegisterInfo()-> + getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0; + // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; - UI = MRI->use_nodbg_begin(DstReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) ReachedBBs.insert(UI->getParent()); @@ -156,8 +181,8 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallVector<MachineOperand*, 8> ExtendedUses; bool ExtendLife = true; - UI = MRI->use_nodbg_begin(SrcReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; @@ -169,6 +194,10 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; } + // Only accept uses of SrcReg:SubIdx. + if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) + continue; + // It's an error to translate this: // // %reg1025 = <sext> %reg1024 @@ -223,9 +252,9 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. - UI = MRI->use_nodbg_begin(DstReg); for (MachineRegisterInfo::use_nodbg_iterator - UE = MRI->use_nodbg_end(); UI != UE; ++UI) + UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); + UI != UE; ++UI) if (UI->isPHI()) PHIBBs.insert(UI->getParent()); @@ -238,14 +267,20 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; // About to add uses of DstReg, clear DstReg's kill flags. - if (!Changed) + if (!Changed) { MRI->clearKillFlags(DstReg); + MRI->constrainRegClass(DstReg, DstRC); + } unsigned NewVR = MRI->createVirtualRegister(RC); - BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVR) + MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); - + // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. + if (UseSrcSubIdx) { + Copy->getOperand(0).setSubReg(SubIdx); + Copy->getOperand(0).setIsUndef(); + } UseMO->setReg(NewVR); ++NumReuse; Changed = true; @@ -255,7 +290,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } -/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that +/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that /// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast /// a value cross register classes), and the source is defined by another /// bitcast instruction B. And if the register class of source of B matches @@ -265,7 +300,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, /// %vreg3<def> = VMOVRS %vreg0 /// Replace all uses of vreg3 with vreg1. -bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, +bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB) { unsigned NumDefs = MI->getDesc().getNumDefs(); unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; @@ -327,22 +362,23 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, return true; } -/// OptimizeCmpInstr - If the instruction is a compare and the previous +/// optimizeCmpInstr - If the instruction is a compare and the previous /// instruction it's comparing against all ready sets (or could be modified to /// set) the same flag as the compare, then we can remove the comparison and use /// the flag from the previous instruction. -bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, +bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB) { // If this instruction is a comparison against zero and isn't comparing a // physical register, we can try to optimize it. - unsigned SrcReg; + unsigned SrcReg, SrcReg2; int CmpMask, CmpValue; - if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) || - TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || + TargetRegisterInfo::isPhysicalRegister(SrcReg) || + (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) return false; // Attempt to optimize the comparison instruction. - if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) { + if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { ++NumCmps; return true; } @@ -350,6 +386,30 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, return false; } +/// isLoadFoldable - Check whether MI is a candidate for folding into a later +/// instruction. We only fold loads to virtual registers and the virtual +/// register defined has a single use. +bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, + unsigned &FoldAsLoadDefReg) { + if (!MI->canFoldAsLoad() || !MI->mayLoad()) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + + unsigned Reg = MI->getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. + if (!MI->getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneUse(Reg)) { + FoldAsLoadDefReg = Reg; + return true; + } + return false; +} + bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { @@ -368,10 +428,10 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, return false; } -/// FoldImmediate - Try folding register operands that are defined by move +/// foldImmediate - Try folding register operands that are defined by move /// immediate instructions, i.e. a trivial constant folding optimization, if /// and only if the def and use are in the same BB. -bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, +bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { @@ -407,6 +467,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; + unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -414,6 +475,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); + FoldAsLoadDefReg = 0; bool First = true; MachineBasicBlock::iterator PMII; @@ -422,15 +484,20 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = &*MII; LocalMIs.insert(MI); + // If there exists an instruction which belongs to the following + // categories, we will discard the load candidate. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { + FoldAsLoadDefReg = 0; ++MII; continue; } + if (MI->mayStore() || MI->isCall()) + FoldAsLoadDefReg = 0; if (MI->isBitcast()) { - if (OptimizeBitcastInstr(MI, MBB)) { + if (optimizeBitcastInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; @@ -438,7 +505,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } } else if (MI->isCompare()) { - if (OptimizeCmpInstr(MI, MBB)) { + if (optimizeCmpInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; @@ -450,11 +517,36 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { - Changed |= OptimizeExtInstr(MI, MBB, LocalMIs); + Changed |= optimizeExtInstr(MI, MBB, LocalMIs); if (SeenMoveImm) - Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); + Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } + // Check whether MI is a load candidate for folding into a later + // instruction. If MI is not a candidate, check whether we can fold an + // earlier load into MI. + if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { + // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr + // can enable folding by converting SUB to CMP. + MachineInstr *DefMI = 0; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, + FoldAsLoadDefReg, DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + ++NumLoadFold; + + // MI is replaced with FoldMI. + Changed = true; + PMII = FoldMI; + MII = llvm::next(PMII); + continue; + } + } First = false; PMII = MII; ++MII; diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 24d3e5a..7449ff5 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -22,7 +22,6 @@ #include "AntiDepBreaker.h" #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" -#include "RegisterClassInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" @@ -31,6 +30,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -78,7 +78,6 @@ AntiDepBreaker::~AntiDepBreaker() { } namespace { class PostRAScheduler : public MachineFunctionPass { - AliasAnalysis *AA; const TargetInstrInfo *TII; RegisterClassInfo RegClassInfo; @@ -206,6 +205,10 @@ SchedulePostRATDList::SchedulePostRATDList( const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); HazardRec = TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); + + assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE || + MRI.tracksLiveness()) && + "Live-ins must be accurate for anti-dependency breaking"); AntiDepBreak = ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ? (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : @@ -423,9 +426,8 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { unsigned Reg = *I; LiveRegs.set(Reg); // Repeat, for all subregs. - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - LiveRegs.set(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); } } else { @@ -437,9 +439,8 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { unsigned Reg = *I; LiveRegs.set(Reg); // Repeat, for all subregs. - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - LiveRegs.set(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); } } } @@ -464,10 +465,9 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, MO.setIsKill(false); bool AllDead = true; const unsigned SuperReg = MO.getReg(); - for (const uint16_t *Subreg = TRI->getSubRegisters(SuperReg); - *Subreg; ++Subreg) { - if (LiveRegs.test(*Subreg)) { - MI->addOperand(MachineOperand::CreateReg(*Subreg, + for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + MI->addOperand(MachineOperand::CreateReg(*SubRegs, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/, @@ -517,9 +517,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { LiveRegs.reset(Reg); // Repeat for all subregs. - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - LiveRegs.reset(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LiveRegs.reset(*SubRegs); } // Examine all used registers and set/clear kill flag. When a @@ -536,9 +535,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { if (!killedRegs.test(Reg)) { kill = true; // A register is not killed if any subregs are live... - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - if (LiveRegs.test(*Subreg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { kill = false; break; } @@ -570,9 +568,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { LiveRegs.set(Reg); - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - LiveRegs.set(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); } } } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 1ad3479..34d075c 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -9,297 +9,163 @@ #define DEBUG_TYPE "processimplicitdefs" -#include "llvm/CodeGen/ProcessImplicitDefs.h" - -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" - using namespace llvm; +namespace { +/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def +/// for each use. Add isUndef marker to implicit_def defs and their uses. +class ProcessImplicitDefs : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + SmallSetVector<MachineInstr*, 16> WorkList; + + void processImplicitDef(MachineInstr *MI); + bool canTurnIntoImplicitDef(MachineInstr *MI); + +public: + static char ID; + + ProcessImplicitDefs() : MachineFunctionPass(ID) { + initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + virtual bool runOnMachineFunction(MachineFunction &fn); +}; +} // end anonymous namespace + char ProcessImplicitDefs::ID = 0; char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID; INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs", "Process Implicit Definitions", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveVariables) INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs", "Process Implicit Definitions", false, false) void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<AliasAnalysis>(); - AU.addPreserved<LiveVariables>(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - AU.addPreservedID(TwoAddressInstructionPassID); - AU.addPreservedID(PHIEliminationID); MachineFunctionPass::getAnalysisUsage(AU); } -bool -ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, - unsigned Reg, unsigned OpIdx, - SmallSet<unsigned, 8> &ImpDefRegs) { - switch(OpIdx) { - case 1: - return MI->isCopy() && (!MI->getOperand(0).readsReg() || - ImpDefRegs.count(MI->getOperand(0).getReg())); - case 2: - return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() || - ImpDefRegs.count(MI->getOperand(0).getReg())); - default: return false; - } -} - -static bool isUndefCopy(MachineInstr *MI, unsigned Reg, - SmallSet<unsigned, 8> &ImpDefRegs) { - if (MI->isCopy()) { - MachineOperand &MO0 = MI->getOperand(0); - MachineOperand &MO1 = MI->getOperand(1); - if (MO1.getReg() != Reg) - return false; - if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg())) - return true; +bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) { + if (!MI->isCopyLike() && + !MI->isInsertSubreg() && + !MI->isRegSequence() && + !MI->isPHI()) return false; - } - return false; + for (MIOperands MO(MI); MO.isValid(); ++MO) + if (MO->isReg() && MO->isUse() && MO->readsReg()) + return false; + return true; } -/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure -/// there is one implicit_def for each use. Add isUndef marker to -/// implicit_def defs and their uses. -bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { - - DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" - << "********** Function: " - << ((Value*)fn.getFunction())->getName() << '\n'); - - bool Changed = false; - - TII = fn.getTarget().getInstrInfo(); - TRI = fn.getTarget().getRegisterInfo(); - MRI = &fn.getRegInfo(); - LV = getAnalysisIfAvailable<LiveVariables>(); - - SmallSet<unsigned, 8> ImpDefRegs; - SmallVector<MachineInstr*, 8> ImpDefMIs; - SmallVector<MachineInstr*, 4> RUses; - SmallPtrSet<MachineBasicBlock*,16> Visited; - SmallPtrSet<MachineInstr*, 8> ModInsts; - - MachineBasicBlock *Entry = fn.begin(); - for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - MachineBasicBlock *MBB = *DFI; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ) { - MachineInstr *MI = &*I; - ++I; - if (MI->isImplicitDef()) { - ImpDefMIs.push_back(MI); - // Is this a sub-register read-modify-write? - if (MI->getOperand(0).readsReg()) - continue; - unsigned Reg = MI->getOperand(0).getReg(); - ImpDefRegs.insert(Reg); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS) - ImpDefRegs.insert(*SS); - } +void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { + DEBUG(dbgs() << "Processing " << *MI); + unsigned Reg = MI->getOperand(0).getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // For virtual regiusters, mark all uses as <undef>, and convert users to + // implicit-def when possible. + for (MachineRegisterInfo::use_nodbg_iterator UI = + MRI->use_nodbg_begin(Reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineOperand &MO = UI.getOperand(); + MO.setIsUndef(); + MachineInstr *UserMI = MO.getParent(); + if (!canTurnIntoImplicitDef(UserMI)) continue; - } - - // Eliminate %reg1032:sub<def> = COPY undef. - if (MI->isCopy() && MI->getOperand(0).readsReg()) { - MachineOperand &MO = MI->getOperand(1); - if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { - if (LV && MO.isKill()) { - LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); - vi.removeKill(MI); - } - unsigned Reg = MI->getOperand(0).getReg(); - MI->eraseFromParent(); - Changed = true; - - // A REG_SEQUENCE may have been expanded into partial definitions. - // If this was the last one, mark Reg as implicitly defined. - if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg)) - ImpDefRegs.insert(Reg); - continue; - } - } - - bool ChangedToImpDef = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.readsReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (!ImpDefRegs.count(Reg)) - continue; - // Use is a copy, just turn it into an implicit_def. - if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) { - bool isKill = MO.isKill(); - MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); - for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) - MI->RemoveOperand(j); - if (isKill) { - ImpDefRegs.erase(Reg); - if (LV) { - LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); - vi.removeKill(MI); - } - } - ChangedToImpDef = true; - Changed = true; - break; - } - - Changed = true; - MO.setIsUndef(); - // This is a partial register redef of an implicit def. - // Make sure the whole register is defined by the instruction. - if (MO.isDef()) { - MI->addRegisterDefined(Reg); - continue; - } - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { - // Make sure other reads of Reg are also marked <undef>. - for (unsigned j = i+1; j != e; ++j) { - MachineOperand &MOJ = MI->getOperand(j); - if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg()) - MOJ.setIsUndef(); - } - ImpDefRegs.erase(Reg); - } - } - - if (ChangedToImpDef) { - // Backtrack to process this new implicit_def. - --I; - } else { - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - ImpDefRegs.erase(MO.getReg()); - } - } + DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI); + UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + WorkList.insert(UserMI); } + MI->eraseFromParent(); + return; + } - // Any outstanding liveout implicit_def's? - for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) { - MachineInstr *MI = ImpDefMIs[i]; - unsigned Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !ImpDefRegs.count(Reg)) { - // Delete all "local" implicit_def's. That include those which define - // physical registers since they cannot be liveout. - MI->eraseFromParent(); - Changed = true; + // This is a physreg implicit-def. + // Look for the first instruction to use or define an alias. + MachineBasicBlock::instr_iterator UserMI = MI; + MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end(); + bool Found = false; + for (++UserMI; UserMI != UserE; ++UserMI) { + for (MIOperands MO(UserMI); MO.isValid(); ++MO) { + if (!MO->isReg()) continue; - } - - // If there are multiple defs of the same register and at least one - // is not an implicit_def, do not insert implicit_def's before the - // uses. - bool Skip = false; - SmallVector<MachineInstr*, 4> DeadImpDefs; - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), - DE = MRI->def_end(); DI != DE; ++DI) { - MachineInstr *DeadImpDef = &*DI; - if (!DeadImpDef->isImplicitDef()) { - Skip = true; - break; - } - DeadImpDefs.push_back(DeadImpDef); - } - if (Skip) + unsigned UserReg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(UserReg) || + !TRI->regsOverlap(Reg, UserReg)) continue; + // UserMI uses or redefines Reg. Set <undef> flags on all uses. + Found = true; + if (MO->isUse()) + MO->setIsUndef(); + } + if (Found) + break; + } - // The only implicit_def which we want to keep are those that are live - // out of its block. - for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j) - DeadImpDefs[j]->eraseFromParent(); - Changed = true; - - // Process each use instruction once. - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - if (UI.getOperand().isUndef()) - continue; - MachineInstr *RMI = &*UI; - if (ModInsts.insert(RMI)) - RUses.push_back(RMI); - } + // If we found the using MI, we can erase the IMPLICIT_DEF. + if (Found) { + DEBUG(dbgs() << "Physreg user: " << *UserMI); + MI->eraseFromParent(); + return; + } - for (unsigned i = 0, e = RUses.size(); i != e; ++i) { - MachineInstr *RMI = RUses[i]; + // Using instr wasn't found, it could be in another block. + // Leave the physreg IMPLICIT_DEF, but trim any extra operands. + for (unsigned i = MI->getNumOperands() - 1; i; --i) + MI->RemoveOperand(i); + DEBUG(dbgs() << "Keeping physreg: " << *MI); +} - // Turn a copy use into an implicit_def. - if (isUndefCopy(RMI, Reg, ImpDefRegs)) { - RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); +/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into +/// <undef> operands. +bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { - bool isKill = false; - SmallVector<unsigned, 4> Ops; - for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { - MachineOperand &RRMO = RMI->getOperand(j); - if (RRMO.isReg() && RRMO.getReg() == Reg) { - Ops.push_back(j); - if (RRMO.isKill()) - isKill = true; - } - } - // Leave the other operands along. - for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) { - unsigned OpIdx = Ops[j]; - RMI->RemoveOperand(OpIdx-j); - } + DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" + << "********** Function: " + << ((Value*)MF.getFunction())->getName() << '\n'); - // Update LiveVariables varinfo if the instruction is a kill. - if (LV && isKill) { - LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); - vi.removeKill(RMI); - } - continue; - } + bool Changed = false; - // Replace Reg with a new vreg that's marked implicit. - const TargetRegisterClass* RC = MRI->getRegClass(Reg); - unsigned NewVReg = MRI->createVirtualRegister(RC); - bool isKill = true; - for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { - MachineOperand &RRMO = RMI->getOperand(j); - if (RRMO.isReg() && RRMO.getReg() == Reg) { - RRMO.setReg(NewVReg); - RRMO.setIsUndef(); - if (isKill) { - // Only the first operand of NewVReg is marked kill. - RRMO.setIsKill(); - isKill = false; - } - } - } - } - RUses.clear(); - ModInsts.clear(); - } - ImpDefRegs.clear(); - ImpDefMIs.clear(); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form."); + assert(WorkList.empty() && "Inconsistent worklist state"); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); + MFI != MFE; ++MFI) { + // Scan the basic block for implicit defs. + for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(), + MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) + if (MBBI->isImplicitDef()) + WorkList.insert(MBBI); + + if (WorkList.empty()) + continue; + + DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size() + << " implicit defs.\n"); + Changed = true; + + // Drain the WorkList to recursively process any new implicit defs. + do processImplicitDef(WorkList.pop_back_val()); + while (!WorkList.empty()); } - return Changed; } - diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 458915e..c791ffb 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -302,7 +302,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; - if (! ShrinkWrapThisFunction) { + if (!ShrinkWrapThisFunction) { // Spill using target interface. I = EntryBlock->begin(); if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index b00eceb..993dbc7 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" +#include "LiveRegMatrix.h" #include "Spiller.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" @@ -34,8 +35,6 @@ using namespace llvm; -STATISTIC(NumAssigned , "Number of registers assigned"); -STATISTIC(NumUnassigned , "Number of registers unassigned"); STATISTIC(NumNewQueued , "Number of new live ranges queued"); // Temporary verification option until we can put verification inside @@ -47,85 +46,20 @@ VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), const char *RegAllocBase::TimerGroupName = "Register Allocation"; bool RegAllocBase::VerifyEnabled = false; -#ifndef NDEBUG -// Verify each LiveIntervalUnion. -void RegAllocBase::verify() { - LiveVirtRegBitSet VisitedVRegs; - OwningArrayPtr<LiveVirtRegBitSet> - unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]); - - // Verify disjoint unions. - for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { - DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI)); - LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg]; - PhysReg2LiveUnion[PhysReg].verify(VRegs); - // Union + intersection test could be done efficiently in one pass, but - // don't add a method to SparseBitVector unless we really need it. - assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions"); - VisitedVRegs |= VRegs; - } - - // Verify vreg coverage. - for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end(); - liItr != liEnd; ++liItr) { - unsigned reg = liItr->first; - if (TargetRegisterInfo::isPhysicalRegister(reg)) continue; - if (!VRM->hasPhys(reg)) continue; // spilled? - unsigned PhysReg = VRM->getPhys(reg); - if (!unionVRegs[PhysReg].test(reg)) { - dbgs() << "LiveVirtReg " << reg << " not in union " << - TRI->getName(PhysReg) << "\n"; - llvm_unreachable("unallocated live vreg"); - } - } - // FIXME: I'm not sure how to verify spilled intervals. -} -#endif //!NDEBUG - //===----------------------------------------------------------------------===// // RegAllocBase Implementation //===----------------------------------------------------------------------===// -// Instantiate a LiveIntervalUnion for each physical register. -void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator, - unsigned NRegs) { - NumRegs = NRegs; - Array = - static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs)); - for (unsigned r = 0; r != NRegs; ++r) - new(Array + r) LiveIntervalUnion(r, allocator); -} - -void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { - NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled); +void RegAllocBase::init(VirtRegMap &vrm, + LiveIntervals &lis, + LiveRegMatrix &mat) { TRI = &vrm.getTargetRegInfo(); MRI = &vrm.getRegInfo(); VRM = &vrm; LIS = &lis; + Matrix = &mat; MRI->freezeReservedRegs(vrm.getMachineFunction()); RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); - - const unsigned NumRegs = TRI->getNumRegs(); - if (NumRegs != PhysReg2LiveUnion.numRegs()) { - PhysReg2LiveUnion.init(UnionAllocator, NumRegs); - // Cache an interferece query for each physical reg - Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); - } -} - -void RegAllocBase::LiveUnionArray::clear() { - if (!Array) - return; - for (unsigned r = 0; r != NumRegs; ++r) - Array[r].~LiveIntervalUnion(); - free(Array); - NumRegs = 0; - Array = 0; -} - -void RegAllocBase::releaseMemory() { - for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r) - PhysReg2LiveUnion[r].clear(); } // Visit all the live registers. If they are already assigned to a physical @@ -133,35 +67,14 @@ void RegAllocBase::releaseMemory() { // them on the priority queue for later assignment. void RegAllocBase::seedLiveRegs() { NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); - for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { - unsigned RegNum = I->first; - LiveInterval &VirtReg = *I->second; - if (TargetRegisterInfo::isPhysicalRegister(RegNum)) - PhysReg2LiveUnion[RegNum].unify(VirtReg); - else - enqueue(&VirtReg); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + enqueue(&LIS->getInterval(Reg)); } } -void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) - << " to " << PrintReg(PhysReg, TRI) << '\n'); - assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); - VRM->assignVirt2Phys(VirtReg.reg, PhysReg); - MRI->setPhysRegUsed(PhysReg); - PhysReg2LiveUnion[PhysReg].unify(VirtReg); - ++NumAssigned; -} - -void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) - << " from " << PrintReg(PhysReg, TRI) << '\n'); - assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign"); - PhysReg2LiveUnion[PhysReg].extract(VirtReg); - VRM->clearVirt(VirtReg.reg); - ++NumUnassigned; -} - // Top-level driver to manage the queue of unassigned VirtRegs and call the // selectOrSplit implementation. void RegAllocBase::allocatePhysRegs() { @@ -179,14 +92,14 @@ void RegAllocBase::allocatePhysRegs() { } // Invalidate all interference queries, live ranges could have changed. - invalidateVirtRegs(); + Matrix->invalidateVirtRegs(); // selectOrSplit requests the allocator to return an available physical // register if possible and populate a list of new live intervals that // result from splitting. DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg->reg)->getName() - << ':' << *VirtReg << '\n'); + << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n'); typedef SmallVector<LiveInterval*, 4> VirtRegVec; VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); @@ -211,7 +124,7 @@ void RegAllocBase::allocatePhysRegs() { } if (AvailablePhysReg) - assign(*VirtReg, AvailablePhysReg); + Matrix->assign(*VirtReg, AvailablePhysReg); for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { @@ -230,51 +143,3 @@ void RegAllocBase::allocatePhysRegs() { } } } - -// Check if this live virtual register interferes with a physical register. If -// not, then check for interference on each register that aliases with the -// physical register. Return the interfering register. -unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg, - unsigned PhysReg) { - for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) - if (query(VirtReg, *AliasI).checkInterference()) - return *AliasI; - return 0; -} - -// Add newly allocated physical registers to the MBB live in sets. -void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { - NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled); - SlotIndexes *Indexes = LIS->getSlotIndexes(); - if (MF->size() <= 1) - return; - - LiveIntervalUnion::SegmentIter SI; - for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { - LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; - if (LiveUnion.empty()) - continue; - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:"); - MachineFunction::iterator MBB = llvm::next(MF->begin()); - MachineFunction::iterator MFE = MF->end(); - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(MBB); - SI.setMap(LiveUnion.getMap()); - SI.find(Start); - while (SI.valid()) { - if (SI.start() <= Start) { - if (!MBB->isLiveIn(PhysReg)) - MBB->addLiveIn(PhysReg); - DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':' - << PrintReg(SI.value()->reg, TRI)); - } else if (SI.start() > Stop) - MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex()); - if (++MBB == MFE) - break; - tie(Start, Stop) = Indexes->getMBBRange(MBB); - SI.advanceTo(Start); - } - DEBUG(dbgs() << '\n'); - } -} - diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index 072fe2b..db0c8e1 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -37,9 +37,9 @@ #ifndef LLVM_CODEGEN_REGALLOCBASE #define LLVM_CODEGEN_REGALLOCBASE -#include "llvm/ADT/OwningPtr.h" #include "LiveIntervalUnion.h" -#include "RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/ADT/OwningPtr.h" namespace llvm { @@ -47,6 +47,7 @@ template<typename T> class SmallVectorImpl; class TargetRegisterInfo; class VirtRegMap; class LiveIntervals; +class LiveRegMatrix; class Spiller; /// RegAllocBase provides the register allocation driver and interface that can @@ -56,69 +57,20 @@ class Spiller; /// live range splitting. They must also override enqueue/dequeue to provide an /// assignment order. class RegAllocBase { - LiveIntervalUnion::Allocator UnionAllocator; - - // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual - // registers may have changed. - unsigned UserTag; - - // Array of LiveIntervalUnions indexed by physical register. - class LiveUnionArray { - unsigned NumRegs; - LiveIntervalUnion *Array; - public: - LiveUnionArray(): NumRegs(0), Array(0) {} - ~LiveUnionArray() { clear(); } - - unsigned numRegs() const { return NumRegs; } - - void init(LiveIntervalUnion::Allocator &, unsigned NRegs); - - void clear(); - - LiveIntervalUnion& operator[](unsigned PhysReg) { - assert(PhysReg < NumRegs && "physReg out of bounds"); - return Array[PhysReg]; - } - }; - - LiveUnionArray PhysReg2LiveUnion; - - // Current queries, one per physreg. They must be reinitialized each time we - // query on a new live virtual register. - OwningArrayPtr<LiveIntervalUnion::Query> Queries; - protected: const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; VirtRegMap *VRM; LiveIntervals *LIS; + LiveRegMatrix *Matrix; RegisterClassInfo RegClassInfo; - RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {} + RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0), Matrix(0) {} virtual ~RegAllocBase() {} // A RegAlloc pass should call this before allocatePhysRegs. - void init(VirtRegMap &vrm, LiveIntervals &lis); - - // Get an initialized query to check interferences between lvr and preg. Note - // that Query::init must be called at least once for each physical register - // before querying a new live virtual register. This ties Queries and - // PhysReg2LiveUnion together. - LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) { - Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]); - return Queries[PhysReg]; - } - - // Get direct access to the underlying LiveIntervalUnion for PhysReg. - LiveIntervalUnion &getLiveUnion(unsigned PhysReg) { - return PhysReg2LiveUnion[PhysReg]; - } - - // Invalidate all cached information about virtual registers - live ranges may - // have changed. - void invalidateVirtRegs() { ++UserTag; } + void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat); // The top-level driver. The output is a VirtRegMap that us updated with // physical register assignments. @@ -140,31 +92,6 @@ protected: virtual unsigned selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &splitLVRs) = 0; - // A RegAlloc pass should call this when PassManager releases its memory. - virtual void releaseMemory(); - - // Helper for checking interference between a live virtual register and a - // physical register, including all its register aliases. If an interference - // exists, return the interfering register, which may be preg or an alias. - unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg); - - /// assign - Assign VirtReg to PhysReg. - /// This should not be called from selectOrSplit for the current register. - void assign(LiveInterval &VirtReg, unsigned PhysReg); - - /// unassign - Undo a previous assignment of VirtReg to PhysReg. - /// This can be invoked from selectOrSplit, but be careful to guarantee that - /// allocation is making progress. - void unassign(LiveInterval &VirtReg, unsigned PhysReg); - - /// addMBBLiveIns - Add physreg liveins to basic blocks. - void addMBBLiveIns(MachineFunction *); - -#ifndef NDEBUG - // Verify each LiveIntervalUnion. - void verify(); -#endif - // Use this group name for NamedRegionTimer. static const char *TimerGroupName; diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 77ee314..3a03807 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -13,11 +13,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "AllocationOrder.h" #include "RegAllocBase.h" #include "LiveDebugVariables.h" -#include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" +#include "LiveRegMatrix.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" @@ -64,10 +65,6 @@ class RABasic : public MachineFunctionPass, public RegAllocBase // context MachineFunction *MF; - // analyses - LiveStacks *LS; - RenderMachineFunction *RMF; - // state std::auto_ptr<Spiller> SpillerInstance; std::priority_queue<LiveInterval*, std::vector<LiveInterval*>, @@ -118,9 +115,6 @@ public: bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, SmallVectorImpl<LiveInterval*> &SplitVRegs); - void spillReg(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs); - static char ID; }; @@ -139,7 +133,7 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); } void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { @@ -147,6 +141,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); AU.addPreserved<LiveDebugVariables>(); @@ -159,41 +154,15 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineLoopInfo>(); AU.addRequired<VirtRegMap>(); AU.addPreserved<VirtRegMap>(); - DEBUG(AU.addRequired<RenderMachineFunction>()); + AU.addRequired<LiveRegMatrix>(); + AU.addPreserved<LiveRegMatrix>(); MachineFunctionPass::getAnalysisUsage(AU); } void RABasic::releaseMemory() { SpillerInstance.reset(0); - RegAllocBase::releaseMemory(); } -// Helper for spillInterferences() that spills all interfering vregs currently -// assigned to this physical register. -void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs) { - LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg); - assert(Q.seenAllInterferences() && "need collectInterferences()"); - const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs(); - - for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(), - E = PendingSpills.end(); I != E; ++I) { - LiveInterval &SpilledVReg = **I; - DEBUG(dbgs() << "extracting from " << - TRI->getName(PhysReg) << " " << SpilledVReg << '\n'); - - // Deallocate the interfering vreg by removing it from the union. - // A LiveInterval instance may not be in a union during modification! - unassign(SpilledVReg, PhysReg); - - // Spill the extracted interval. - LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM); - spiller().spill(LRE); - } - // After extracting segments, the query's results are invalid. But keep the - // contents valid until we're done accessing pendingSpills. - Q.clear(); -} // Spill or split all live virtual registers currently unified under PhysReg // that interfere with VirtReg. The newly spilled or split live intervals are @@ -202,22 +171,41 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, SmallVectorImpl<LiveInterval*> &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. - unsigned NumInterferences = 0; + SmallVector<LiveInterval*, 8> Intfs; + // Collect interferences assigned to any alias of the physical register. - for (const uint16_t *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) { - LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI); - NumInterferences += QAlias.collectInterferingVRegs(); - if (QAlias.seenUnspillableVReg()) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + Q.collectInterferingVRegs(); + if (Q.seenUnspillableVReg()) return false; + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + if (!Intf->isSpillable() || Intf->weight > VirtReg.weight) + return false; + Intfs.push_back(Intf); } } DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) << " interferences with " << VirtReg << "\n"); - assert(NumInterferences > 0 && "expect interference"); + assert(!Intfs.empty() && "expected interference"); // Spill each interfering vreg allocated to PhysReg or an alias. - for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) - spillReg(VirtReg, *AliasI, SplitVRegs); + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval &Spill = *Intfs[i]; + + // Skip duplicates. + if (!VRM->hasPhys(Spill.reg)) + continue; + + // Deallocate the interfering vreg by removing it from the union. + // A LiveInterval instance may not be in a union during modification! + Matrix->unassign(Spill); + + // Spill the extracted interval. + LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM); + spiller().spill(LRE); + } return true; } @@ -235,49 +223,36 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // selectOrSplit(). unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &SplitVRegs) { - // Check for register mask interference. When live ranges cross calls, the - // set of usable registers is reduced to the callee-saved ones. - bool CrossRegMasks = LIS->checkRegMaskInterference(VirtReg, UsableRegs); - // Populate a list of physical register spill candidates. SmallVector<unsigned, 8> PhysRegSpillCands; // Check for an available register in this class. - ArrayRef<unsigned> Order = - RegClassInfo.getOrder(MRI->getRegClass(VirtReg.reg)); - for (ArrayRef<unsigned>::iterator I = Order.begin(), E = Order.end(); I != E; - ++I) { - unsigned PhysReg = *I; - - // If PhysReg is clobbered by a register mask, it isn't useful for - // allocation or spilling. - if (CrossRegMasks && !UsableRegs.test(PhysReg)) - continue; - - // Check interference and as a side effect, intialize queries for this - // VirtReg and its aliases. - unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg); - if (interfReg == 0) { - // Found an available register. + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + while (unsigned PhysReg = Order.next()) { + // Check for interference in PhysReg + switch (Matrix->checkInterference(VirtReg, PhysReg)) { + case LiveRegMatrix::IK_Free: + // PhysReg is available, allocate it. return PhysReg; - } - LiveIntervalUnion::Query &IntfQ = query(VirtReg, interfReg); - IntfQ.collectInterferingVRegs(1); - LiveInterval *interferingVirtReg = IntfQ.interferingVRegs().front(); - // The current VirtReg must either be spillable, or one of its interferences - // must have less spill weight. - if (interferingVirtReg->weight < VirtReg.weight ) { + case LiveRegMatrix::IK_VirtReg: + // Only virtual registers in the way, we may be able to spill them. PhysRegSpillCands.push_back(PhysReg); + continue; + + default: + // RegMask or RegUnit interference. + continue; } } + // Try to spill another interfering reg with less spill weight. for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(), - PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { - - if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue; + PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { + if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) + continue; - assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 && + assert(!Matrix->checkInterference(VirtReg, *PhysRegI) && "Interference after spill."); // Tell the caller to allocate to this newly freed physical register. return *PhysRegI; @@ -287,7 +262,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; - LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM); + LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell @@ -301,53 +276,17 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { << ((Value*)mf.getFunction())->getName() << '\n'); MF = &mf; - DEBUG(RMF = &getAnalysis<RenderMachineFunction>()); - - RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>()); + RegAllocBase::init(getAnalysis<VirtRegMap>(), + getAnalysis<LiveIntervals>(), + getAnalysis<LiveRegMatrix>()); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); - addMBBLiveIns(MF); - // Diagnostic output before rewriting DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); - // optional HTML output - DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM)); - - // FIXME: Verification currently must run before VirtRegRewriter. We should - // make the rewriter a separate pass and override verifyAnalysis instead. When - // that happens, verification naturally falls under VerifyMachineCode. -#ifndef NDEBUG - if (VerifyEnabled) { - // Verify accuracy of LiveIntervals. The standard machine code verifier - // ensures that each LiveIntervals covers all uses of the virtual reg. - - // FIXME: MachineVerifier is badly broken when using the standard - // spiller. Always use -spiller=inline with -verify-regalloc. Even with the - // inline spiller, some tests fail to verify because the coalescer does not - // always generate verifiable code. - MF->verify(this, "In RABasic::verify"); - - // Verify that LiveIntervals are partitioned into unions and disjoint within - // the unions. - verify(); - } -#endif // !NDEBUG - - // Run rewriter - VRM->rewrite(LIS->getSlotIndexes()); - - // Write out new DBG_VALUE instructions. - getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); - - // All machine operands and other references to virtual registers have been - // replaced. Remove the virtual registers and release all the transient data. - VRM->clearAllVirt(); - MRI->clearVirtRegs(); releaseMemory(); - return true; } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index e09b7f8..6b3a48e 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "RegisterClassInfo.h" #include "llvm/BasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" @@ -77,7 +77,7 @@ namespace { explicit LiveReg(unsigned v) : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {} - unsigned getSparseSetKey() const { + unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); } }; @@ -201,20 +201,16 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { /// its virtual register, and it is guaranteed to be a block-local register. /// bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { - // Check for non-debug uses or defs following MO. - // This is the most likely way to fail - fast path it. - MachineOperand *Next = &MO; - while ((Next = Next->getNextOperandForReg())) - if (!Next->isDebug()) - return false; - // If the register has ever been spilled or reloaded, we conservatively assume // it is a global register used in multiple blocks. if (StackSlotForVirtReg[MO.getReg()] != -1) return false; // Check that the use/def chain has exactly one operand - MO. - return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO; + MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); + if (&I.getOperand() != &MO) + return false; + return ++I == MRI->reg_nodbg_end(); } /// addKillFlag - Set kill flags on last use of a virtual register. @@ -354,8 +350,8 @@ void RAFast::usePhysReg(MachineOperand &MO) { } // Maybe a superregister is reserved? - for (const uint16_t *AS = TRI->getAliasSet(PhysReg); - unsigned Alias = *AS; ++AS) { + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; switch (PhysRegState[Alias]) { case regDisabled: break; @@ -408,8 +404,8 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // This is a disabled register, disable all aliases. PhysRegState[PhysReg] = NewState; - for (const uint16_t *AS = TRI->getAliasSet(PhysReg); - unsigned Alias = *AS; ++AS) { + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; @@ -456,8 +452,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { // This is a disabled register, add up cost of aliases. DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n"); unsigned Cost = 0; - for (const uint16_t *AS = TRI->getAliasSet(PhysReg); - unsigned Alias = *AS; ++AS) { + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; if (UsedInInstr.test(Alias)) return spillImpossible; switch (unsigned VirtReg = PhysRegState[Alias]) { @@ -659,9 +655,10 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, // Return true if the operand kills its register. bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { MachineOperand &MO = MI->getOperand(OpNum); + bool Dead = MO.isDead(); if (!MO.getSubReg()) { MO.setReg(PhysReg); - return MO.isKill() || MO.isDead(); + return MO.isKill() || Dead; } // Handle subregister index. @@ -674,7 +671,13 @@ bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { MI->addRegisterKilled(PhysReg, TRI, true); return true; } - return MO.isDead(); + + // A <def,read-undef> of a sub-register requires an implicit def of the full + // register. + if (MO.isDef() && MO.isUndef()) + MI->addRegisterDefined(PhysReg, TRI); + + return Dead; } // Handle special instruction operand like early clobbers and tied ops when @@ -704,13 +707,10 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - UsedInInstr.set(Reg); - if (ThroughRegs.count(PhysRegState[Reg])) - definePhysReg(MI, Reg, regFree); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - UsedInInstr.set(*AS); - if (ThroughRegs.count(PhysRegState[*AS])) - definePhysReg(MI, *AS, regFree); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + UsedInInstr.set(*AI); + if (ThroughRegs.count(PhysRegState[*AI])) + definePhysReg(MI, *AI, regFree); } } @@ -1029,9 +1029,8 @@ void RAFast::AllocateBasicBlock() { if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // Look for physreg defs and tied uses. if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; - UsedInInstr.set(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) - UsedInInstr.set(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + UsedInInstr.set(*AI); } } diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 3f2a617..6ac5428 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -16,6 +16,7 @@ #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" +#include "LiveRegMatrix.h" #include "RegAllocBase.h" #include "Spiller.h" #include "SpillPlacement.h" @@ -73,7 +74,6 @@ class RAGreedy : public MachineFunctionPass, // analyses SlotIndexes *Indexes; - LiveStacks *LS; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; EdgeBundles *Bundles; @@ -168,19 +168,6 @@ class RAGreedy : public MachineFunctionPass, } }; - // Register mask interference. The current VirtReg is checked for register - // mask interference on entry to selectOrSplit(). If there is no - // interference, UsableRegs is left empty. If there is interference, - // UsableRegs has a bit mask of registers that can be used without register - // mask interference. - BitVector UsableRegs; - - /// clobberedByRegMask - Returns true if PhysReg is not directly usable - /// because of register mask clobbers. - bool clobberedByRegMask(unsigned PhysReg) const { - return !UsableRegs.empty() && !UsableRegs.test(PhysReg); - } - // splitting state. std::auto_ptr<SplitAnalysis> SA; std::auto_ptr<SplitEditor> SE; @@ -286,6 +273,8 @@ private: SmallVectorImpl<LiveInterval*>&); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); + unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); unsigned trySplit(LiveInterval&, AllocationOrder&, @@ -327,6 +316,7 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); } @@ -336,6 +326,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); @@ -349,6 +340,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineLoopInfo>(); AU.addRequired<VirtRegMap>(); AU.addPreserved<VirtRegMap>(); + AU.addRequired<LiveRegMatrix>(); + AU.addPreserved<LiveRegMatrix>(); AU.addRequired<EdgeBundles>(); AU.addRequired<SpillPlacement>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -360,8 +353,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { //===----------------------------------------------------------------------===// bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { - if (unsigned PhysReg = VRM->getPhys(VirtReg)) { - unassign(LIS->getInterval(VirtReg), PhysReg); + if (VRM->hasPhys(VirtReg)) { + Matrix->unassign(LIS->getInterval(VirtReg)); return true; } // Unassigned virtreg is probably in the priority queue. @@ -370,13 +363,12 @@ bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { } void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { - unsigned PhysReg = VRM->getPhys(VirtReg); - if (!PhysReg) + if (!VRM->hasPhys(VirtReg)) return; // Register is assigned, put it back on the queue for reassignment. LiveInterval &LI = LIS->getInterval(VirtReg); - unassign(LI, PhysReg); + Matrix->unassign(LI); enqueue(&LI); } @@ -398,7 +390,6 @@ void RAGreedy::releaseMemory() { SpillerInstance.reset(0); ExtraRegInfo.clear(); GlobalCand.clear(); - RegAllocBase::releaseMemory(); } void RAGreedy::enqueue(LiveInterval *LI) { @@ -450,12 +441,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &NewVRegs) { Order.rewind(); unsigned PhysReg; - while ((PhysReg = Order.next())) { - if (clobberedByRegMask(PhysReg)) - continue; - if (!checkPhysRegInterference(VirtReg, PhysReg)) + while ((PhysReg = Order.next())) + if (!Matrix->checkInterference(VirtReg, PhysReg)) break; - } if (!PhysReg || Order.isHint(PhysReg)) return PhysReg; @@ -464,7 +452,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // If we missed a simple hint, try to cheaply evict interference from the // preferred register. if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) - if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) { + if (Order.isHint(Hint)) { DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); EvictionCost MaxCost(1); if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { @@ -527,6 +515,10 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// @returns True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, bool IsHint, EvictionCost &MaxCost) { + // It is only possible to evict virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) + return false; + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny // evicting anything with the same or a newer cascade number. This prevents @@ -539,8 +531,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, Cascade = NextCascade; EvictionCost Cost; - for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // If there is 10 or more interferences, chances are one is heavier. if (Q.collectInterferingVRegs(10) >= 10) return false; @@ -548,15 +540,21 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Check if any interfering live range is heavier than MaxWeight. for (unsigned i = Q.interferingVRegs().size(); i; --i) { LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) - return false; + assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && + "Only expecting virtual register interference from query"); // Never evict spill products. They cannot split or spill. if (getStage(*Intf) == RS_Done) return false; // Once a live range becomes small enough, it is urgent that we find a // register for it. This is indicated by an infinite spill weight. These // urgent live ranges get to evict almost anything. - bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable(); + // + // Also allow urgent evictions of unspillable ranges from a strictly + // larger allocation order. + bool Urgent = !VirtReg.isSpillable() && + (Intf->isSpillable() || + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); // Only evict older cascades or live ranges without a cascade. unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; if (Cascade <= IntfCascade) { @@ -597,19 +595,29 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); - for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + + // Collect all interfering virtregs first. + SmallVector<LiveInterval*, 8> Intfs; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); assert(Q.seenAllInterferences() && "Didn't check all interfererences."); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; - unassign(*Intf, VRM->getPhys(Intf->reg)); - assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || - VirtReg.isSpillable() < Intf->isSpillable()) && - "Cannot decrease cascade number, illegal eviction"); - ExtraRegInfo[Intf->reg].Cascade = Cascade; - ++NumEvicted; - NewVRegs.push_back(Intf); - } + ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); + Intfs.append(IVR.begin(), IVR.end()); + } + + // Evict them second. This will invalidate the queries. + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval *Intf = Intfs[i]; + // The same VirtReg may be present in multiple RegUnits. Skip duplicates. + if (!VRM->hasPhys(Intf->reg)) + continue; + Matrix->unassign(*Intf); + assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + VirtReg.isSpillable() < Intf->isSpillable()) && + "Cannot decrease cascade number, illegal eviction"); + ExtraRegInfo[Intf->reg].Cascade = Cascade; + ++NumEvicted; + NewVRegs.push_back(Intf); } } @@ -636,8 +644,6 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, Order.rewind(); while (unsigned PhysReg = Order.next()) { - if (clobberedByRegMask(PhysReg)) - continue; if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -1183,7 +1189,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; // Prepare split editor. - LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); // Assign all edge bundles to the preferred candidate, or NoCand. @@ -1231,7 +1237,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); - LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -1265,6 +1271,65 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; } + +//===----------------------------------------------------------------------===// +// Per-Instruction Splitting +//===----------------------------------------------------------------------===// + +/// tryInstructionSplit - Split a live range around individual instructions. +/// This is normally not worthwhile since the spiller is doing essentially the +/// same thing. However, when the live range is in a constrained register +/// class, it may help to insert copies such that parts of the live range can +/// be moved to a larger register class. +/// +/// This is similar to spilling to a larger register class. +unsigned +RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + // There is no point to this if there are no larger sub-classes. + if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) + return 0; + + // Always enable split spill mode, since we're effectively spilling to a + // register. + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitEditor::SM_Size); + + ArrayRef<SlotIndex> Uses = SA->getUseSlots(); + if (Uses.size() <= 1) + return 0; + + DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); + + // Split around every non-copy instruction. + for (unsigned i = 0; i != Uses.size(); ++i) { + if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) + if (MI->isFullCopy()) { + DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); + continue; + } + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); + SE->useIntv(SegStart, SegStop); + } + + if (LREdit.empty()) { + DEBUG(dbgs() << "All uses were copies.\n"); + return 0; + } + + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Assign all new registers to RS_Spill. This was the last chance. + setStage(LREdit.begin(), LREdit.end(), RS_Spill); + return 0; +} + + //===----------------------------------------------------------------------===// // Local Splitting //===----------------------------------------------------------------------===// @@ -1291,9 +1356,9 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, GapWeight.assign(NumGaps, 0.0f); // Add interference from each overlapping register. - for (const uint16_t *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI) - .checkInterference()) + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) + .checkInterference()) continue; // We know that VirtReg is a continuous interval from FirstInstr to @@ -1303,7 +1368,8 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, // surrounding the instruction. The exception is interference before // StartIdx and after StopIdx. // - LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx); + LiveIntervalUnion::SegmentIter IntI = + Matrix->getLiveUnions()[*Units] .find(StartIdx); for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { // Skip the gaps before IntI. while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) @@ -1323,6 +1389,30 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; } } + + // Add fixed interference. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + const LiveInterval &LI = LIS->getRegUnit(*Units); + LiveInterval::const_iterator I = LI.find(StartIdx); + LiveInterval::const_iterator E = LI.end(); + + // Same loop as above. Mark any overlapped gaps as HUGE_VALF. + for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { + while (Uses[Gap+1].getBoundaryIndex() < I->start) + if (++Gap == NumGaps) + break; + if (Gap == NumGaps) + break; + + for (; Gap != NumGaps; ++Gap) { + GapWeight[Gap] = HUGE_VALF; + if (Uses[Gap+1].getBaseIndex() >= I->end) + break; + } + if (Gap == NumGaps) + break; + } + } } /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only @@ -1355,7 +1445,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // If VirtReg is live across any register mask operands, compute a list of // gaps with register masks. SmallVector<unsigned, 8> RegMaskGaps; - if (!UsableRegs.empty()) { + if (Matrix->checkRegMaskInterference(VirtReg)) { // Get regmask slots for the whole block. ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); DEBUG(dbgs() << RMS.size() << " regmasks in block:"); @@ -1417,7 +1507,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, calcGapWeights(PhysReg, GapWeight); // Remove any gaps with regmask clobbers. - if (clobberedByRegMask(PhysReg)) + if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) GapWeight[RegMaskGaps[i]] = HUGE_VALF; @@ -1512,7 +1602,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit); SE->openIntv(); @@ -1561,7 +1651,10 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, if (LIS->intervalIsInOneMBB(VirtReg)) { NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); SA->analyze(&VirtReg); - return tryLocalSplit(VirtReg, Order, NewVRegs); + unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + return tryInstructionSplit(VirtReg, Order, NewVRegs); } NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); @@ -1574,7 +1667,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // an assertion when the coalescer is fixed. if (SA->didRepairRange()) { // VirtReg has changed, so all cached queries are invalid. - invalidateVirtRegs(); + Matrix->invalidateVirtRegs(); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; } @@ -1599,11 +1692,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &NewVRegs) { - // Check if VirtReg is live across any calls. - UsableRegs.clear(); - if (LIS->checkRegMaskInterference(VirtReg, UsableRegs)) - DEBUG(dbgs() << "Live across regmasks.\n"); - // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) @@ -1644,7 +1732,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // Finally spill VirtReg itself. NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); spiller().spill(LRE); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); @@ -1665,7 +1753,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); - RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>()); + RegAllocBase::init(getAnalysis<VirtRegMap>(), + getAnalysis<LiveIntervals>(), + getAnalysis<LiveRegMatrix>()); Indexes = &getAnalysis<SlotIndexes>(); DomTree = &getAnalysis<MachineDominatorTree>(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); @@ -1679,30 +1769,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; - IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI); + IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. allocatePhysRegs(); - addMBBLiveIns(MF); - LIS->addKillFlags(); - - // Run rewriter - { - NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled); - VRM->rewrite(Indexes); - } - - // Write out new DBG_VALUE instructions. - { - NamedRegionTimer T("Emit Debug Info", TimerGroupName, TimePassesIsEnabled); - DebugVars->emitDebugValues(VRM); - } - - // All machine operands and other references to virtual registers have been - // replaced. Remove the virtual registers and release all the transient data. - VRM->clearAllVirt(); - MRI->clearVirtRegs(); releaseMemory(); - return true; } diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index a284614..d0db26b 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -31,7 +31,6 @@ #define DEBUG_TYPE "regalloc" -#include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" #include "RegisterCoalescer.h" @@ -98,7 +97,6 @@ public: initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); } /// Return the pass name. @@ -134,7 +132,6 @@ private: const TargetInstrInfo *tii; const MachineLoopInfo *loopInfo; MachineRegisterInfo *mri; - RenderMachineFunction *rmf; std::auto_ptr<Spiller> spiller; LiveIntervals *lis; @@ -196,7 +193,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, const RegSet &vregs) { typedef std::vector<const LiveInterval*> LIVector; - ArrayRef<SlotIndex> regMaskSlots = lis->getRegMaskSlots(); + LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); @@ -205,12 +202,11 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, RegSet pregs; // Collect the set of preg intervals, record that they're used in the MF. - for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end(); - itr != end; ++itr) { - if (TargetRegisterInfo::isPhysicalRegister(itr->first)) { - pregs.insert(itr->first); - mri->setPhysRegUsed(itr->first); - } + for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) { + if (mri->def_empty(Reg)) + continue; + pregs.insert(Reg); + mri->setPhysRegUsed(Reg); } BitVector reservedRegs = tri->getReservedRegs(*mf); @@ -220,7 +216,11 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, vregItr != vregEnd; ++vregItr) { unsigned vreg = *vregItr; const TargetRegisterClass *trc = mri->getRegClass(vreg); - const LiveInterval *vregLI = &lis->getInterval(vreg); + LiveInterval *vregLI = &LIS->getInterval(vreg); + + // Record any overlaps with regmask operands. + BitVector regMaskOverlaps(tri->getNumRegs()); + LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps); // Compute an initial allowed set for the current vreg. typedef std::vector<unsigned> VRAllowed; @@ -228,80 +228,26 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf); for (unsigned i = 0; i != rawOrder.size(); ++i) { unsigned preg = rawOrder[i]; - if (!reservedRegs.test(preg)) { - vrAllowed.push_back(preg); - } - } - - RegSet overlappingPRegs; - - // Record physical registers whose ranges overlap. - for (RegSet::const_iterator pregItr = pregs.begin(), - pregEnd = pregs.end(); - pregItr != pregEnd; ++pregItr) { - unsigned preg = *pregItr; - const LiveInterval *pregLI = &lis->getInterval(preg); - - if (pregLI->empty()) { + if (reservedRegs.test(preg)) continue; - } - if (vregLI->overlaps(*pregLI)) - overlappingPRegs.insert(preg); - } + // vregLI crosses a regmask operand that clobbers preg. + if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg)) + continue; - // Record any overlaps with regmask operands. - BitVector regMaskOverlaps(tri->getNumRegs()); - for (ArrayRef<SlotIndex>::iterator rmItr = regMaskSlots.begin(), - rmEnd = regMaskSlots.end(); - rmItr != rmEnd; ++rmItr) { - SlotIndex rmIdx = *rmItr; - if (vregLI->liveAt(rmIdx)) { - MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx); - const uint32_t* regMask = 0; - for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(), - mopEnd = rmMI->operands_end(); - mopItr != mopEnd; ++mopItr) { - if (mopItr->isRegMask()) { - regMask = mopItr->getRegMask(); - break; - } + // vregLI overlaps fixed regunit interference. + bool Interference = false; + for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) { + if (vregLI->overlaps(LIS->getRegUnit(*Units))) { + Interference = true; + break; } - assert(regMask != 0 && "Couldn't find register mask."); - regMaskOverlaps.setBitsNotInMask(regMask); } - } + if (Interference) + continue; - for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) { - if (regMaskOverlaps.test(preg)) - overlappingPRegs.insert(preg); - } - - for (RegSet::const_iterator pregItr = overlappingPRegs.begin(), - pregEnd = overlappingPRegs.end(); - pregItr != pregEnd; ++pregItr) { - unsigned preg = *pregItr; - - // Remove the register from the allowed set. - VRAllowed::iterator eraseItr = - std::find(vrAllowed.begin(), vrAllowed.end(), preg); - - if (eraseItr != vrAllowed.end()) { - vrAllowed.erase(eraseItr); - } - - // Also remove any aliases. - const uint16_t *aliasItr = tri->getAliasSet(preg); - if (aliasItr != 0) { - for (; *aliasItr != 0; ++aliasItr) { - VRAllowed::iterator eraseItr = - std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr); - - if (eraseItr != vrAllowed.end()) { - vrAllowed.erase(eraseItr); - } - } - } + // preg is usable for this virtual register. + vrAllowed.push_back(preg); } // Construct the node. @@ -379,7 +325,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( PBQP::Graph &g = p->getGraph(); const TargetMachine &tm = mf->getTarget(); - CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo()); + CoalescerPair cp(*tm.getRegisterInfo()); // Scan the machine function and add a coalescing cost whenever CoalescerPair // gives the Ok. @@ -498,21 +444,17 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<MachineLoopInfo>(); au.addPreserved<MachineLoopInfo>(); au.addRequired<VirtRegMap>(); - au.addRequired<RenderMachineFunction>(); MachineFunctionPass::getAnalysisUsage(au); } void RegAllocPBQP::findVRegIntervalsToAlloc() { // Iterate over all live ranges. - for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); - itr != end; ++itr) { - - // Ignore physical ones. - if (TargetRegisterInfo::isPhysicalRegister(itr->first)) + for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (mri->reg_nodbg_empty(Reg)) continue; - - LiveInterval *li = itr->second; + LiveInterval *li = &lis->getInterval(Reg); // If this live interval is non-empty we will use pbqp to allocate it. // Empty intervals we allocate in a simple post-processing stage in @@ -544,16 +486,17 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, if (problem.isPRegOption(vreg, alloc)) { unsigned preg = problem.getPRegForOption(vreg, alloc); - DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n"); + DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> " + << tri->getName(preg) << "\n"); assert(preg != 0 && "Invalid preg selected."); vrm->assignVirt2Phys(vreg, preg); } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); SmallVector<LiveInterval*, 8> newSpills; - LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm); + LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm); spiller->spill(LRE); - DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: " + DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: " << LRE.getParent().weight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to @@ -561,7 +504,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); itr != end; ++itr) { assert(!(*itr)->empty() && "Empty spill range."); - DEBUG(dbgs() << (*itr)->reg << " "); + DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " "); vregsToAlloc.insert((*itr)->reg); } @@ -579,9 +522,6 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, void RegAllocPBQP::finalizeAlloc() const { - typedef LiveIntervals::iterator LIIterator; - typedef LiveInterval::Ranges::const_iterator LRIterator; - // First allocate registers for the empty intervals. for (RegSet::const_iterator itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end(); @@ -597,51 +537,6 @@ void RegAllocPBQP::finalizeAlloc() const { vrm->assignVirt2Phys(li->reg, physReg); } - - // Finally iterate over the basic blocks to compute and set the live-in sets. - SmallVector<MachineBasicBlock*, 8> liveInMBBs; - MachineBasicBlock *entryMBB = &*mf->begin(); - - for (LIIterator liItr = lis->begin(), liEnd = lis->end(); - liItr != liEnd; ++liItr) { - - const LiveInterval *li = liItr->second; - unsigned reg = 0; - - // Get the physical register for this interval - if (TargetRegisterInfo::isPhysicalRegister(li->reg)) { - reg = li->reg; - } else if (vrm->isAssignedReg(li->reg)) { - reg = vrm->getPhys(li->reg); - } else { - // Ranges which are assigned a stack slot only are ignored. - continue; - } - - if (reg == 0) { - // Filter out zero regs - they're for intervals that were spilled. - continue; - } - - // Iterate over the ranges of the current interval... - for (LRIterator lrItr = li->begin(), lrEnd = li->end(); - lrItr != lrEnd; ++lrItr) { - - // Find the set of basic blocks which this range is live into... - if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) { - // And add the physreg for this interval to their live-in sets. - for (unsigned i = 0; i != liveInMBBs.size(); ++i) { - if (liveInMBBs[i] != entryMBB) { - if (!liveInMBBs[i]->isLiveIn(reg)) { - liveInMBBs[i]->addLiveIn(reg); - } - } - } - liveInMBBs.clear(); - } - } - } - } bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { @@ -655,7 +550,6 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { lis = &getAnalysis<LiveIntervals>(); lss = &getAnalysis<LiveStacks>(); loopInfo = &getAnalysis<MachineLoopInfo>(); - rmf = &getAnalysis<RenderMachineFunction>(); vrm = &getAnalysis<VirtRegMap>(); spiller.reset(createInlineSpiller(*this, MF, *vrm)); @@ -719,22 +613,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Finalise allocation, allocate empty ranges. finalizeAlloc(); - - rmf->renderMachineFunction("After PBQP register allocation.", vrm); - vregsToAlloc.clear(); emptyIntervalVRegs.clear(); DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); - // Run rewriter - vrm->rewrite(lis->getSlotIndexes()); - - // All machine operands and other references to virtual registers have been - // replaced. Remove the virtual registers. - vrm->clearAllVirt(); - mri->clearVirtRegs(); - return true; } diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 17165fa..652bc30 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -15,8 +15,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -50,9 +50,8 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { CSRNum.clear(); CSRNum.resize(TRI->getNumRegs(), 0); for (unsigned N = 0; unsigned Reg = CSR[N]; ++N) - for (const uint16_t *AS = TRI->getOverlaps(Reg); - unsigned Alias = *AS; ++AS) - CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... Update = true; } CalleeSaved = CSR; diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h deleted file mode 100644 index 400e1f4..0000000 --- a/lib/CodeGen/RegisterClassInfo.h +++ /dev/null @@ -1,132 +0,0 @@ -//===-- RegisterClassInfo.h - Dynamic Register Class Info -*- C++ -*-------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the RegisterClassInfo class which provides dynamic -// information about target register classes. Callee saved and reserved -// registers depends on calling conventions and other dynamic information, so -// some things cannot be determined statically. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_REGISTERCLASSINFO_H -#define LLVM_CODEGEN_REGISTERCLASSINFO_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/Target/TargetRegisterInfo.h" - -namespace llvm { - -class RegisterClassInfo { - struct RCInfo { - unsigned Tag; - unsigned NumRegs; - bool ProperSubClass; - OwningArrayPtr<unsigned> Order; - - RCInfo() : Tag(0), NumRegs(0), ProperSubClass(false) {} - operator ArrayRef<unsigned>() const { - return makeArrayRef(Order.get(), NumRegs); - } - }; - - // Brief cached information for each register class. - OwningArrayPtr<RCInfo> RegClass; - - // Tag changes whenever cached information needs to be recomputed. An RCInfo - // entry is valid when its tag matches. - unsigned Tag; - - const MachineFunction *MF; - const TargetRegisterInfo *TRI; - - // Callee saved registers of last MF. Assumed to be valid until the next - // runOnFunction() call. - const uint16_t *CalleeSaved; - - // Map register number to CalleeSaved index + 1; - SmallVector<uint8_t, 4> CSRNum; - - // Reserved registers in the current MF. - BitVector Reserved; - - // Compute all information about RC. - void compute(const TargetRegisterClass *RC) const; - - // Return an up-to-date RCInfo for RC. - const RCInfo &get(const TargetRegisterClass *RC) const { - const RCInfo &RCI = RegClass[RC->getID()]; - if (Tag != RCI.Tag) - compute(RC); - return RCI; - } - -public: - RegisterClassInfo(); - - /// runOnFunction - Prepare to answer questions about MF. This must be called - /// before any other methods are used. - void runOnMachineFunction(const MachineFunction &MF); - - /// getNumAllocatableRegs - Returns the number of actually allocatable - /// registers in RC in the current function. - unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const { - return get(RC).NumRegs; - } - - /// getOrder - Returns the preferred allocation order for RC. The order - /// contains no reserved registers, and registers that alias callee saved - /// registers come last. - ArrayRef<unsigned> getOrder(const TargetRegisterClass *RC) const { - return get(RC); - } - - /// isProperSubClass - Returns true if RC has a legal super-class with more - /// allocatable registers. - /// - /// Register classes like GR32_NOSP are not proper sub-classes because %esp - /// is not allocatable. Similarly, tGPR is not a proper sub-class in Thumb - /// mode because the GPR super-class is not legal. - bool isProperSubClass(const TargetRegisterClass *RC) const { - return get(RC).ProperSubClass; - } - - /// getLastCalleeSavedAlias - Returns the last callee saved register that - /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR. - unsigned getLastCalleeSavedAlias(unsigned PhysReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); - if (unsigned N = CSRNum[PhysReg]) - return CalleeSaved[N-1]; - return 0; - } - - /// isReserved - Returns true when PhysReg is a reserved register. - /// - /// Reserved registers may belong to an allocatable register class, but the - /// target has explicitly requested that they are not used. - /// - bool isReserved(unsigned PhysReg) const { - return Reserved.test(PhysReg); - } - - /// isAllocatable - Returns true when PhysReg belongs to an allocatable - /// register class and it hasn't been reserved. - /// - /// Allocatable registers may show up in the allocation order of some virtual - /// register, so a register allocator needs to track its liveness and - /// availability. - bool isAllocatable(unsigned PhysReg) const { - return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg); - } -}; -} // end namespace llvm - -#endif - diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 75f88ca..9906334 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -16,34 +16,35 @@ #define DEBUG_TYPE "regalloc" #include "RegisterCoalescer.h" #include "LiveDebugVariables.h" -#include "RegisterClassInfo.h" #include "VirtRegMap.h" #include "llvm/Pass.h" #include "llvm/Value.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <cmath> using namespace llvm; @@ -53,8 +54,6 @@ STATISTIC(numCrossRCs , "Number of cross class joins performed"); STATISTIC(numCommutes , "Number of instruction commuting performed"); STATISTIC(numExtends , "Number of copies extended"); STATISTIC(NumReMats , "Number of instructions re-materialized"); -STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); -STATISTIC(numAborts , "Number of times interval joining aborted"); STATISTIC(NumInflated , "Number of register classes inflated"); static cl::opt<bool> @@ -63,22 +62,13 @@ EnableJoining("join-liveintervals", cl::init(true)); static cl::opt<bool> -DisableCrossClassJoin("disable-cross-class-join", - cl::desc("Avoid coalescing cross register class copies"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -EnablePhysicalJoin("join-physregs", - cl::desc("Join physical register copies"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), cl::Hidden); namespace { - class RegisterCoalescer : public MachineFunctionPass { + class RegisterCoalescer : public MachineFunctionPass, + private LiveRangeEdit::Delegate { MachineFunction* MF; MachineRegisterInfo* MRI; const TargetMachine* TM; @@ -90,87 +80,83 @@ namespace { AliasAnalysis *AA; RegisterClassInfo RegClassInfo; - /// JoinedCopies - Keep track of copies eliminated due to coalescing. - /// - SmallPtrSet<MachineInstr*, 32> JoinedCopies; + /// WorkList - Copy instructions yet to be coalesced. + SmallVector<MachineInstr*, 8> WorkList; + + /// ErasedInstrs - Set of instruction pointers that have been erased, and + /// that may be present in WorkList. + SmallPtrSet<MachineInstr*, 8> ErasedInstrs; + + /// Dead instructions that are about to be deleted. + SmallVector<MachineInstr*, 8> DeadDefs; + + /// Virtual registers to be considered for register class inflation. + SmallVector<unsigned, 8> InflateRegs; - /// ReMatCopies - Keep track of copies eliminated due to remat. - /// - SmallPtrSet<MachineInstr*, 32> ReMatCopies; + /// Recursively eliminate dead defs in DeadDefs. + void eliminateDeadDefs(); - /// ReMatDefs - Keep track of definition instructions which have - /// been remat'ed. - SmallPtrSet<MachineInstr*, 8> ReMatDefs; + /// LiveRangeEdit callback. + void LRE_WillEraseInstruction(MachineInstr *MI); - /// joinIntervals - join compatible live intervals - void joinIntervals(); + /// joinAllIntervals - join compatible live intervals + void joinAllIntervals(); - /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting - /// copies that cannot yet be coalesced into the "TryAgain" list. - void CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector<MachineInstr*> &TryAgain); + /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting + /// copies that cannot yet be coalesced into WorkList. + void copyCoalesceInMBB(MachineBasicBlock *MBB); - /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, + /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after + /// position From. Return true if any progress was made. + bool copyCoalesceWorkList(unsigned From = 0); + + /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns /// true if the copy was successfully coalesced away. If it is not /// currently possible to coalesce this interval, but it may be possible if /// other things get coalesced, then it returns true by reference in /// 'Again'. - bool JoinCopy(MachineInstr *TheCopy, bool &Again); + bool joinCopy(MachineInstr *TheCopy, bool &Again); - /// JoinIntervals - Attempt to join these two intervals. On failure, this + /// joinIntervals - Attempt to join these two intervals. On failure, this /// returns false. The output "SrcInt" will not have been modified, so we /// can use this information below to update aliases. - bool JoinIntervals(CoalescerPair &CP); + bool joinIntervals(CoalescerPair &CP); + + /// Attempt joining with a reserved physreg. + bool joinReservedPhysReg(CoalescerPair &CP); - /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If + /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If /// the source value number is defined by a copy from the destination reg /// see if we can merge these two destination reg valno# into a single /// value number, eliminating a copy. - bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); + bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); - /// HasOtherReachingDefs - Return true if there are definitions of IntB + /// hasOtherReachingDefs - Return true if there are definitions of IntB /// other than BValNo val# that can reach uses of AValno val# of IntA. - bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, + bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, VNInfo *AValNo, VNInfo *BValNo); - /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy. + /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy. /// If the source value number is defined by a commutable instruction and /// its other operand is coalesced to the copy dest register, see if we /// can transform the copy into a noop by commuting the definition. - bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); + bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); - /// ReMaterializeTrivialDef - If the source of a copy is defined by a + /// reMaterializeTrivialDef - If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - /// If PreserveSrcInt is true, make sure SrcInt is valid after the call. - bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt, - unsigned DstReg, MachineInstr *CopyMI); - - /// shouldJoinPhys - Return true if a physreg copy should be joined. - bool shouldJoinPhys(CoalescerPair &CP); - - /// isWinToJoinCrossClass - Return true if it's profitable to coalesce - /// two virtual registers from different register classes. - bool isWinToJoinCrossClass(unsigned SrcReg, - unsigned DstReg, - const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *NewRC); - - /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and + bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, + MachineInstr *CopyMI); + + /// canJoinPhys - Return true if a physreg copy should be joined. + bool canJoinPhys(CoalescerPair &CP); + + /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use /// being updated is not zero, make sure to set it to the correct physical /// subregister. - void UpdateRegDefsUses(const CoalescerPair &CP); - - /// RemoveDeadDef - If a def of a live interval is now determined dead, - /// remove the val# it defines. If the live interval becomes empty, remove - /// it as well. - bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); - - /// markAsJoined - Remember that CopyMI has already been joined. - void markAsJoined(MachineInstr *CopyMI); + void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); /// eliminateUndefCopy - Handle copies of undef values. bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); @@ -233,7 +219,8 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, } bool CoalescerPair::setRegisters(const MachineInstr *MI) { - SrcReg = DstReg = SubIdx = 0; + SrcReg = DstReg = 0; + SrcIdx = DstIdx = 0; NewRC = 0; Flipped = CrossClass = false; @@ -271,39 +258,44 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { } } else { // Both registers are virtual. + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); // Both registers have subreg indices. if (SrcSub && DstSub) { - // For now we only handle the case of identical indices in commensurate - // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg - // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg. - if (SrcSub != DstSub) + // Copies between different sub-registers are never coalescable. + if (Src == Dst && SrcSub != DstSub) return false; - const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); - const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); - if (!TRI.getCommonSubClass(DstRC, SrcRC)) + + NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub, + SrcIdx, DstIdx); + if (!NewRC) return false; - SrcSub = DstSub = 0; + } else if (DstSub) { + // SrcReg will be merged with a sub-register of DstReg. + SrcIdx = DstSub; + NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); + } else if (SrcSub) { + // DstReg will be merged with a sub-register of SrcReg. + DstIdx = SrcSub; + NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub); + } else { + // This is a straight copy without sub-registers. + NewRC = TRI.getCommonSubClass(DstRC, SrcRC); } - // There can be no SrcSub. - if (SrcSub) { + // The combined constraint may be impossible to satisfy. + if (!NewRC) + return false; + + // Prefer SrcReg to be a sub-register of DstReg. + // FIXME: Coalescer should support subregs symmetrically. + if (DstIdx && !SrcIdx) { std::swap(Src, Dst); - DstSub = SrcSub; - SrcSub = 0; - assert(!Flipped && "Unexpected flip"); - Flipped = true; + std::swap(SrcIdx, DstIdx); + Flipped = !Flipped; } - // Find the new register class. - const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); - const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); - if (DstSub) - NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); - else - NewRC = TRI.getCommonSubClass(DstRC, SrcRC); - if (!NewRC) - return false; CrossClass = NewRC != DstRC || NewRC != SrcRC; } // Check our invariants @@ -312,14 +304,14 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { "Cannot have a physical SubIdx"); SrcReg = Src; DstReg = Dst; - SubIdx = DstSub; return true; } bool CoalescerPair::flip() { - if (SubIdx || TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) return false; std::swap(SrcReg, DstReg); + std::swap(SrcIdx, DstIdx); Flipped = !Flipped; return true; } @@ -343,7 +335,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { if (!TargetRegisterInfo::isPhysicalRegister(Dst)) return false; - assert(!SubIdx && "Inconsistent CoalescerPair state."); + assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); // DstSub could be set for a physreg from INSERT_SUBREG. if (DstSub) Dst = TRI.getSubReg(Dst, DstSub); @@ -357,7 +349,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (DstReg != Dst) return false; // Registers match, do the subregisters line up? - return compose(TRI, SubIdx, SrcSub) == DstSub; + return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub); } } @@ -375,19 +367,18 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) { - /// Joined copies are not deleted immediately, but kept in JoinedCopies. - JoinedCopies.insert(CopyMI); +void RegisterCoalescer::eliminateDeadDefs() { + SmallVector<LiveInterval*, 8> NewRegs; + LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); +} - /// Mark all register operands of CopyMI as <undef> so they won't affect dead - /// code elimination. - for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), - E = CopyMI->operands_end(); I != E; ++I) - if (I->isReg()) - I->setIsUndef(true); +// Callback from eliminateDeadDefs(). +void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { + // MI may be in WorkList. Make sure we don't visit it. + ErasedInstrs.insert(MI); } -/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA /// being the source and IntB being the dest, thus this defines a value number /// in IntB. If the source value number (in IntA) is defined by a copy from B, /// see if we can merge these two pieces of B into a single value number, @@ -402,12 +393,10 @@ void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) { /// /// This returns true if an interval was modified. /// -bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, - MachineInstr *CopyMI) { - // Bail if there is no dst interval - can happen when merging physical subreg - // operations. - if (!LIS->hasInterval(CP.getDstReg())) - return false; +bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, + MachineInstr *CopyMI) { + assert(!CP.isPartial() && "This doesn't work for partial copies."); + assert(!CP.isPhys() && "This doesn't work for physreg copies."); LiveInterval &IntA = LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); @@ -457,24 +446,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // IntB, we can merge them. if (ValLR+1 != BLR) return false; - // If a live interval is a physical register, conservatively check if any - // of its aliases is overlapping the live interval of the virtual register. - // If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) - if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) { - DEBUG({ - dbgs() << "\t\tInterfere with alias "; - LIS->getInterval(*AS).print(dbgs(), TRI); - }); - return false; - } - } - - DEBUG({ - dbgs() << "Extending: "; - IntB.print(dbgs(), TRI); - }); + DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; // We are about to delete CopyMI, so need to remove it as the 'instruction @@ -487,33 +459,10 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // two value numbers. IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); - // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const uint16_t *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) { - if (!LIS->hasInterval(*SR)) - continue; - LiveInterval &SRLI = LIS->getInterval(*SR); - SRLI.addRange(LiveRange(FillerStart, FillerEnd, - SRLI.getNextValue(FillerStart, - LIS->getVNInfoAllocator()))); - } - } - // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) { - // If B1 is killed by a PHI, then the merged live range must also be killed - // by the same PHI, as B0 and B1 can not overlap. - bool HasPHIKill = BValNo->hasPHIKill(); + if (BValNo != ValLR->valno) IntB.MergeValueNumberInto(BValNo, ValLR->valno); - if (HasPHIKill) - ValLR->valno->setHasPHIKill(true); - } - DEBUG({ - dbgs() << " result = "; - IntB.print(dbgs(), TRI); - dbgs() << "\n"; - }); + DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. @@ -525,8 +474,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // Rewrite the copy. If the copy instruction was killing the destination // register before the merge, find the last use and trim the live range. That // will also add the isKill marker. - CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(), - *TRI); + CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); if (ALR->end == CopyIdx) LIS->shrinkToUses(&IntA); @@ -534,12 +482,17 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, return true; } -/// HasOtherReachingDefs - Return true if there are definitions of IntB +/// hasOtherReachingDefs - Return true if there are definitions of IntB /// other than BValNo val# that can reach uses of AValno val# of IntA. -bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA, - LiveInterval &IntB, - VNInfo *AValNo, - VNInfo *BValNo) { +bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, + LiveInterval &IntB, + VNInfo *AValNo, + VNInfo *BValNo) { + // If AValNo has PHI kills, conservatively assume that IntB defs can reach + // the PHI values. + if (LIS->hasPHIKill(IntA, AValNo)) + return true; + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; @@ -559,7 +512,7 @@ bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA, return false; } -/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with +/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with /// IntA being the source and IntB being the dest, thus this defines a value /// number in IntB. If the source value number (in IntA) is defined by a /// commutable instruction and its other operand is coalesced to the copy dest @@ -582,18 +535,9 @@ bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA, /// /// This returns true if an interval was modified. /// -bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, - MachineInstr *CopyMI) { - // FIXME: For now, only eliminate the copy by commuting its def when the - // source register is a virtual register. We want to guard against cases - // where the copy is a back edge copy and commuting the def lengthen the - // live interval of the source register to the entire loop. - if (CP.isPhys() && CP.isFlipped()) - return false; - - // Bail if there is no dst interval. - if (!LIS->hasInterval(CP.getDstReg())) - return false; +bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, + MachineInstr *CopyMI) { + assert (!CP.isPhys()); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); @@ -613,10 +557,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // AValNo is the value number in A that defines the copy, A3 in the example. VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); - - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) + if (AValNo->isPHIDef() || AValNo->isUnused()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) @@ -647,17 +588,9 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // Make sure there are no other definitions of IntB that would reach the // uses which the new definition can reach. - if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) + if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) return false; - // Abort if the aliases of IntB.reg have values that are not simply the - // clobbers from the superreg. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) - if (LIS->hasInterval(*AS) && - HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0)) - return false; - // If some of the uses of IntA.reg is already coalesced away, return false. // It's not possible to determine whether it's safe to perform the coalescing. for (MachineRegisterInfo::use_nodbg_iterator UI = @@ -666,13 +599,14 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *UseMI = &*UI; SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end()) + if (ULR == IntA.end() || ULR->valno != AValNo) continue; - if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) + // If this use is tied to a def, we can't rewrite the register. + if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) return false; } - DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t' + DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' << *DefMI); // At this point we have decided that it is legal to do this @@ -709,8 +643,6 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; ++UI; - if (JoinedCopies.count(UseMI)) - continue; if (UseMI->isDebugValue()) { // FIXME These don't have an instruction index. Not clear we have enough // info to decide whether to do this replacement or not. For now do it. @@ -721,6 +653,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; + // Kill flags are no longer accurate. They are recomputed after RA. + UseMO.setIsKill(false); if (TargetRegisterInfo::isPhysicalRegister(NewReg)) UseMO.substPhysReg(NewReg, *TRI); else @@ -742,7 +676,9 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); assert(DVNI->def == DefIdx); BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); - markAsJoined(UseMI); + ErasedInstrs.insert(UseMI); + LIS->RemoveMachineInstrFromMaps(UseMI); + UseMI->eraseFromParent(); } // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition @@ -762,12 +698,11 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, return true; } -/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial +/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. -bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, - bool preserveSrcInt, - unsigned DstReg, - MachineInstr *CopyMI) { +bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, + unsigned DstReg, + MachineInstr *CopyMI) { SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); @@ -792,7 +727,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, // Make sure the copy destination register class fits the instruction // definition register class. The mismatch can happen as a result of earlier // extract_subreg, insert_subreg, subreg_to_reg coalescing. - const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI); + const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { if (MRI->getRegClass(DstReg) != RC) return false; @@ -838,23 +773,21 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { - unsigned reg = NewMIImplDefs[i]; - LiveInterval &li = LIS->getInterval(reg); - VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(), - LIS->getVNInfoAllocator()); - LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN); - li.addRange(lr); + unsigned Reg = NewMIImplDefs[i]; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + if (LiveInterval *LI = LIS->getCachedRegUnit(*Units)) + LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } CopyMI->eraseFromParent(); - ReMatCopies.insert(CopyMI); - ReMatDefs.insert(DefMI); + ErasedInstrs.insert(CopyMI); DEBUG(dbgs() << "Remat: " << *NewMI); ++NumReMats; // The source interval can become smaller because we removed a use. - if (preserveSrcInt) - LIS->shrinkToUses(&SrcInt); + LIS->shrinkToUses(&SrcInt, &DeadDefs); + if (!DeadDefs.empty()) + eliminateDeadDefs(); return true; } @@ -902,51 +835,40 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, return true; } -/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and +/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use /// being updated is not zero, make sure to set it to the correct physical /// subregister. -void -RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { - bool DstIsPhys = CP.isPhys(); - unsigned SrcReg = CP.getSrcReg(); - unsigned DstReg = CP.getDstReg(); - unsigned SubIdx = CP.getSubIdx(); +void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, + unsigned DstReg, + unsigned SubIdx) { + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); // Update LiveDebugVariables. LDV->renameRegister(SrcReg, DstReg, SubIdx); for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); MachineInstr *UseMI = I.skipInstruction();) { - // A PhysReg copy that won't be coalesced can perhaps be rematerialized - // instead. - if (DstIsPhys) { - if (UseMI->isFullCopy() && - UseMI->getOperand(1).getReg() == SrcReg && - UseMI->getOperand(0).getReg() != SrcReg && - UseMI->getOperand(0).getReg() != DstReg && - !JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(LIS->getInterval(SrcReg), false, - UseMI->getOperand(0).getReg(), UseMI)) - continue; - } - SmallVector<unsigned,8> Ops; bool Reads, Writes; tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + // If SrcReg wasn't read, it may still be the case that DstReg is live-in + // because SrcReg is a sub-register. + if (DstInt && !Reads && SubIdx) + Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI)); + // Replace SrcReg with DstReg in all UseMI operands. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(Ops[i]); - // Make sure we don't create read-modify-write defs accidentally. We - // assume here that a SrcReg def cannot be joined into a live DstReg. If - // RegisterCoalescer starts tracking partially live registers, we will - // need to check the actual LiveInterval to determine if DstReg is live - // here. - if (SubIdx && !Reads) - MO.setIsUndef(); + // Adjust <undef> flags in case of sub-register joins. We don't want to + // turn a full def into a read-modify-write sub-register def and vice + // versa. + if (SubIdx && MO.isDef()) + MO.setIsUndef(!Reads); if (DstIsPhys) MO.substPhysReg(DstReg, *TRI); @@ -954,10 +876,6 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { MO.substVirtReg(DstReg, SubIdx, *TRI); } - // This instruction is a copy that will be removed. - if (JoinedCopies.count(UseMI)) - continue; - DEBUG({ dbgs() << "\t\tupdated: "; if (!UseMI->isDebugValue()) @@ -967,210 +885,107 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { } } -/// removeIntervalIfEmpty - Check if the live interval of a physical register -/// is empty, if so remove it and also remove the empty intervals of its -/// sub-registers. Return true if live interval is removed. -static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS, - const TargetRegisterInfo *TRI) { - if (li.empty()) { - if (TargetRegisterInfo::isPhysicalRegister(li.reg)) - for (const uint16_t* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) { - if (!LIS->hasInterval(*SR)) - continue; - LiveInterval &sli = LIS->getInterval(*SR); - if (sli.empty()) - LIS->removeInterval(*SR); - } - LIS->removeInterval(li.reg); - return true; - } - return false; -} - -/// RemoveDeadDef - If a def of a live interval is now determined dead, remove -/// the val# it defines. If the live interval becomes empty, remove it as well. -bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, - MachineInstr *DefMI) { - SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot(); - LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); - if (DefIdx != MLR->valno->def) - return false; - li.removeValNo(MLR->valno); - return removeIntervalIfEmpty(li, LIS, TRI); -} - -/// shouldJoinPhys - Return true if a copy involving a physreg should be joined. -/// We need to be careful about coalescing a source physical register with a -/// virtual register. Once the coalescing is done, it cannot be broken and these -/// are not spillable! If the destination interval uses are far away, think -/// twice about coalescing them! -bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) { - bool Allocatable = LIS->isAllocatable(CP.getDstReg()); - LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); - +/// canJoinPhys - Return true if a copy involving a physreg should be joined. +bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) { /// Always join simple intervals that are defined by a single copy from a /// reserved register. This doesn't increase register pressure, so it is /// always beneficial. - if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) - return true; - - if (!EnablePhysicalJoin) { - DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); - return false; - } - - // Only coalesce to allocatable physreg, we don't want to risk modifying - // reserved registers. - if (!Allocatable) { - DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); - return false; // Not coalescable. - } - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (LIS->hasInterval(CP.getDstReg()) && - LIS->getInterval(CP.getDstReg()).ranges.size() > 1000) { - ++numAborts; - DEBUG(dbgs() - << "\tPhysical register live interval too complicated, abort!\n"); + if (!RegClassInfo.isReserved(CP.getDstReg())) { + DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); return false; } - // FIXME: Why are we skipping this test for partial copies? - // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. - if (!CP.isPartial()) { - const TargetRegisterClass *RC = MRI->getRegClass(CP.getSrcReg()); - unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; - unsigned Length = LIS->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold) { - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - return false; - } - } - return true; -} - -/// isWinToJoinCrossClass - Return true if it's profitable to coalesce -/// two virtual registers from different register classes. -bool -RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg, - unsigned DstReg, - const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *NewRC) { - unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC); - // This heuristics is good enough in practice, but it's obviously not *right*. - // 4 is a magic number that works well enough for x86, ARM, etc. It filter - // out all but the most restrictive register classes. - if (NewRCCount > 4 || - // Early exit if the function is fairly small, coalesce aggressively if - // that's the case. For really special register classes with 3 or - // fewer registers, be a bit more careful. - (LIS->getFuncInstructionCount() / NewRCCount) < 8) - return true; - LiveInterval &SrcInt = LIS->getInterval(SrcReg); - LiveInterval &DstInt = LIS->getInterval(DstReg); - unsigned SrcSize = LIS->getApproximateInstructionCount(SrcInt); - unsigned DstSize = LIS->getApproximateInstructionCount(DstInt); - - // Coalesce aggressively if the intervals are small compared to the number of - // registers in the new class. The number 4 is fairly arbitrary, chosen to be - // less aggressive than the 8 used for the whole function size. - const unsigned ThresSize = 4 * NewRCCount; - if (SrcSize <= ThresSize && DstSize <= ThresSize) + LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); + if (CP.isFlipped() && JoinVInt.containsOneValue()) return true; - // Estimate *register use density*. If it doubles or more, abort. - unsigned SrcUses = std::distance(MRI->use_nodbg_begin(SrcReg), - MRI->use_nodbg_end()); - unsigned DstUses = std::distance(MRI->use_nodbg_begin(DstReg), - MRI->use_nodbg_end()); - unsigned NewUses = SrcUses + DstUses; - unsigned NewSize = SrcSize + DstSize; - if (SrcRC != NewRC && SrcSize > ThresSize) { - unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC); - if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) - return false; - } - if (DstRC != NewRC && DstSize > ThresSize) { - unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC); - if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) - return false; - } - return true; + DEBUG(dbgs() << "\tCannot join defs into reserved register.\n"); + return false; } - -/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true /// if the copy was successfully coalesced away. If it is not currently /// possible to coalesce this interval, but it may be possible if other /// things get coalesced, then it returns true by reference in 'Again'. -bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { +bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { Again = false; - if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) - return false; // Already done. - DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI); - CoalescerPair CP(*TII, *TRI); + CoalescerPair CP(*TRI); if (!CP.setRegisters(CopyMI)) { DEBUG(dbgs() << "\tNot coalescable.\n"); return false; } - // If they are already joined we continue. - if (CP.getSrcReg() == CP.getDstReg()) { - markAsJoined(CopyMI); - DEBUG(dbgs() << "\tCopy already coalesced.\n"); - return false; // Not coalescable. + // Dead code elimination. This really should be handled by MachineDCE, but + // sometimes dead copies slip through, and we can't generate invalid live + // ranges. + if (!CP.isPhys() && CopyMI->allDefsAreDead()) { + DEBUG(dbgs() << "\tCopy is dead.\n"); + DeadDefs.push_back(CopyMI); + eliminateDeadDefs(); + return true; } // Eliminate undefs. if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { - markAsJoined(CopyMI); DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n"); + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); return false; // Not coalescable. } - DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) - << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSubIdx()) - << "\n"); + // Coalesced copies are normally removed immediately, but transformations + // like removeCopyByCommutingDef() can inadvertently create identity copies. + // When that happens, just join the values and remove the copy. + if (CP.getSrcReg() == CP.getDstReg()) { + LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); + DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); + LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI)); + if (VNInfo *DefVNI = LRQ.valueDefined()) { + VNInfo *ReadVNI = LRQ.valueIn(); + assert(ReadVNI && "No value before copy and no <undef> flag."); + assert(ReadVNI != DefVNI && "Cannot read and define the same value."); + LI.MergeValueNumberInto(DefVNI, ReadVNI); + DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); + } + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + return true; + } // Enforce policies. if (CP.isPhys()) { - if (!shouldJoinPhys(CP)) { + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) + << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) + << '\n'); + if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. if (!CP.isFlipped() && - ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true, + reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), CP.getDstReg(), CopyMI)) return true; return false; } } else { - // Avoid constraining virtual register regclass too much. - if (CP.isCrossClass()) { - DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); - if (DisableCrossClassJoin) { - DEBUG(dbgs() << "\tCross-class joins disabled.\n"); - return false; - } - if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), - MRI->getRegClass(CP.getSrcReg()), - MRI->getRegClass(CP.getDstReg()), - CP.getNewRC())) { - DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } + DEBUG({ + dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName() + << " with "; + if (CP.getDstIdx() && CP.getSrcIdx()) + dbgs() << PrintReg(CP.getDstReg()) << " in " + << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " + << PrintReg(CP.getSrcReg()) << " in " + << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; + else + dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in " + << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; + }); // When possible, let DstReg be the larger interval. - if (!CP.getSubIdx() && LIS->getInterval(CP.getSrcReg()).ranges.size() > + if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() > LIS->getInterval(CP.getDstReg()).ranges.size()) CP.flip(); } @@ -1179,21 +994,22 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { // Otherwise, if one of the intervals being joined is a physreg, this method // always canonicalizes DstInt to be it. The output "SrcInt" will not have // been modified, so we can use this information below to update aliases. - if (!JoinIntervals(CP)) { + if (!joinIntervals(CP)) { // Coalescing failed. // If definition of source is defined by trivial computation, try // rematerializing it. if (!CP.isFlipped() && - ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true, + reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), CP.getDstReg(), CopyMI)) return true; // If we can eliminate the copy without merging the live ranges, do so now. - if (!CP.isPartial()) { - if (AdjustCopiesBackFrom(CP, CopyMI) || - RemoveCopyByCommutingDef(CP, CopyMI)) { - markAsJoined(CopyMI); + if (!CP.isPartial() && !CP.isPhys()) { + if (adjustCopiesBackFrom(CP, CopyMI) || + removeCopyByCommutingDef(CP, CopyMI)) { + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); DEBUG(dbgs() << "\tTrivial!\n"); return true; } @@ -1212,29 +1028,21 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); } - // Remember to delete the copy instruction. - markAsJoined(CopyMI); + // Removing sub-register copies can ease the register class constraints. + // Make sure we attempt to inflate the register class of DstReg. + if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC())) + InflateRegs.push_back(CP.getDstReg()); - UpdateRegDefsUses(CP); + // CopyMI has been erased by joinIntervals at this point. Remove it from + // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back + // to the work list. This keeps ErasedInstrs from growing needlessly. + ErasedInstrs.erase(CopyMI); - // If we have extended the live range of a physical register, make sure we - // update live-in lists as well. - if (CP.isPhys()) { - SmallVector<MachineBasicBlock*, 16> BlockSeq; - // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the - // ranges for this, and they are preserved. - LiveInterval &SrcInt = LIS->getInterval(CP.getSrcReg()); - for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); - I != E; ++I ) { - LIS->findLiveInMBBs(I->start, I->end, BlockSeq); - for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { - MachineBasicBlock &block = *BlockSeq[idx]; - if (!block.isLiveIn(CP.getDstReg())) - block.addLiveIn(CP.getDstReg()); - } - BlockSeq.clear(); - } - } + // Rewrite all SrcReg operands to DstReg. + // Also update DstReg operands to include DstIdx if it is set. + if (CP.getDstIdx()) + updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); // SrcReg is guaranteed to be the register whose live interval that is // being merged. @@ -1244,16 +1052,56 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ - LiveInterval &DstInt = LIS->getInterval(CP.getDstReg()); - dbgs() << "\tJoined. Result = "; - DstInt.print(dbgs(), TRI); - dbgs() << "\n"; + dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI); + if (!CP.isPhys()) + dbgs() << LIS->getInterval(CP.getDstReg()); + dbgs() << '\n'; }); ++numJoins; return true; } +/// Attempt joining with a reserved physreg. +bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { + assert(CP.isPhys() && "Must be a physreg copy"); + assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register"); + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS + << '\n'); + + assert(CP.isFlipped() && RHS.containsOneValue() && + "Invalid join with reserved register"); + + // Optimization for reserved registers like ESP. We can only merge with a + // reserved physreg if RHS has a single value that is a copy of CP.DstReg(). + // The live range of the reserved register will look like a set of dead defs + // - we don't properly track the live range of reserved registers. + + // Deny any overlapping intervals. This depends on all the reserved + // register live ranges to look like dead defs. + for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI) + if (RHS.overlaps(LIS->getRegUnit(*UI))) { + DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); + return false; + } + + // Skip any value computations, we are not adding new values to the + // reserved register. Also skip merging the live ranges, the reserved + // register live range doesn't need to be accurate as long as all the + // defs are there. + + // Delete the identity copy. + MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg); + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + + // We don't track kills for reserved registers. + MRI->clearKillFlags(CP.getSrcReg()); + + return true; +} + /// ComputeUltimateVN - Assuming we are going to join two live intervals, /// compute what the resultant value numbers for each value in the input two /// ranges will be. This is complicated by copies between the two which can @@ -1320,144 +1168,70 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, const TargetRegisterInfo &tri, CoalescerPair &CP, VNInfo *VNI, - LiveRange *LR, + VNInfo *OtherVNI, SmallVector<MachineInstr*, 8> &DupCopies) { // FIXME: This is very conservative. For example, we don't handle // physical registers. MachineInstr *MI = li.getInstructionFromIndex(VNI->def); - if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys()) + if (!MI || CP.isPartial() || CP.isPhys()) return false; - unsigned Dst = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); - - if (!TargetRegisterInfo::isVirtualRegister(Src) || - !TargetRegisterInfo::isVirtualRegister(Dst)) + unsigned A = CP.getDstReg(); + if (!TargetRegisterInfo::isVirtualRegister(A)) return false; - unsigned A = CP.getDstReg(); unsigned B = CP.getSrcReg(); - - if (B == Dst) - std::swap(A, B); - assert(Dst == A); - - VNInfo *Other = LR->valno; - const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def); - - if (!OtherMI || !OtherMI->isFullCopy()) + if (!TargetRegisterInfo::isVirtualRegister(B)) return false; - unsigned OtherDst = OtherMI->getOperand(0).getReg(); - unsigned OtherSrc = OtherMI->getOperand(1).getReg(); - - if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) || - !TargetRegisterInfo::isVirtualRegister(OtherDst)) + MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def); + if (!OtherMI) return false; - assert(OtherDst == B); - - if (Src != OtherSrc) - return false; + if (MI->isImplicitDef()) { + DupCopies.push_back(MI); + return true; + } else { + if (!MI->isFullCopy()) + return false; + unsigned Src = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Src)) + return false; + if (!OtherMI->isFullCopy()) + return false; + unsigned OtherSrc = OtherMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(OtherSrc)) + return false; - // If the copies use two different value numbers of X, we cannot merge - // A and B. - LiveInterval &SrcInt = li.getInterval(Src); - // getVNInfoBefore returns NULL for undef copies. In this case, the - // optimization is still safe. - if (SrcInt.getVNInfoBefore(Other->def) != SrcInt.getVNInfoBefore(VNI->def)) - return false; + if (Src != OtherSrc) + return false; - DupCopies.push_back(MI); + // If the copies use two different value numbers of X, we cannot merge + // A and B. + LiveInterval &SrcInt = li.getInterval(Src); + // getVNInfoBefore returns NULL for undef copies. In this case, the + // optimization is still safe. + if (SrcInt.getVNInfoBefore(OtherVNI->def) != + SrcInt.getVNInfoBefore(VNI->def)) + return false; - return true; + DupCopies.push_back(MI); + return true; + } } -/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// joinIntervals - Attempt to join these two intervals. On failure, this /// returns false. -bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { - LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); - DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), TRI); dbgs() << "\n"; }); - - // If a live interval is a physical register, check for interference with any - // aliases. The interference check implemented here is a bit more conservative - // than the full interfeence check below. We allow overlapping live ranges - // only when one is a copy of the other. - if (CP.isPhys()) { - // Optimization for reserved registers like ESP. - // We can only merge with a reserved physreg if RHS has a single value that - // is a copy of CP.DstReg(). The live range of the reserved register will - // look like a set of dead defs - we don't properly track the live range of - // reserved registers. - if (RegClassInfo.isReserved(CP.getDstReg())) { - assert(CP.isFlipped() && RHS.containsOneValue() && - "Invalid join with reserved register"); - // Deny any overlapping intervals. This depends on all the reserved - // register live ranges to look like dead defs. - for (const uint16_t *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) { - if (!LIS->hasInterval(*AS)) { - // Make sure at least DstReg itself exists before attempting a join. - if (*AS == CP.getDstReg()) - LIS->getOrCreateInterval(CP.getDstReg()); - continue; - } - if (RHS.overlaps(LIS->getInterval(*AS))) { - DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n'); - return false; - } - } - // Skip any value computations, we are not adding new values to the - // reserved register. Also skip merging the live ranges, the reserved - // register live range doesn't need to be accurate as long as all the - // defs are there. - return true; - } - - // Check if a register mask clobbers DstReg. - BitVector UsableRegs; - if (LIS->checkRegMaskInterference(RHS, UsableRegs) && - !UsableRegs.test(CP.getDstReg())) { - DEBUG(dbgs() << "\t\tRegister mask interference.\n"); - return false; - } +bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { + // Handle physreg joins separately. + if (CP.isPhys()) + return joinReservedPhysReg(CP); - for (const uint16_t *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){ - if (!LIS->hasInterval(*AS)) - continue; - const LiveInterval &LHS = LIS->getInterval(*AS); - LiveInterval::const_iterator LI = LHS.begin(); - for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); - RI != RE; ++RI) { - LI = std::lower_bound(LI, LHS.end(), RI->start); - // Does LHS have an overlapping live range starting before RI? - if ((LI != LHS.begin() && LI[-1].end > RI->start) && - (RI->start != RI->valno->def || - !CP.isCoalescable(LIS->getInstructionFromIndex(RI->start)))) { - DEBUG({ - dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), TRI); - dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; - }); - return false; - } - - // Check that LHS ranges beginning in this range are copies. - for (; LI != LHS.end() && LI->start < RI->end; ++LI) { - if (LI->start != LI->valno->def || - !CP.isCoalescable(LIS->getInstructionFromIndex(LI->start))) { - DEBUG({ - dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), TRI); - dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; - }); - return false; - } - } - } - } - } + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS + << '\n'); // Compute the final value assignment, assuming that the live ranges can be // coalesced. @@ -1468,9 +1242,11 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { SmallVector<VNInfo*, 16> NewVNInfo; SmallVector<MachineInstr*, 8> DupCopies; + SmallVector<MachineInstr*, 8> DeadCopies; LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg()); - DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), TRI); dbgs() << "\n"; }); + DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS + << '\n'); // Loop over the value numbers of the LHS, seeing if any are defined from // the RHS. @@ -1481,21 +1257,24 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { continue; MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); assert(MI && "Missing def"); - if (!MI->isCopyLike()) // Src not defined by a copy? + if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? continue; // Figure out the value # from the RHS. - LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def); // The copy could be to an aliased physreg. - if (!lr) continue; + if (!OtherVNI) + continue; // DstReg is known to be a register in the LHS interval. If the src is // from the RHS interval, we can use its value #. - if (!CP.isCoalescable(MI) && - !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) + if (CP.isCoalescable(MI)) + DeadCopies.push_back(MI); + else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, + DupCopies)) continue; - LHSValsDefinedFromRHS[VNI] = lr->valno; + LHSValsDefinedFromRHS[VNI] = OtherVNI; } // Loop over the value numbers of the RHS, seeing if any are defined from @@ -1507,21 +1286,24 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { continue; MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); assert(MI && "Missing def"); - if (!MI->isCopyLike()) // Src not defined by a copy? + if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? continue; // Figure out the value # from the LHS. - LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def); // The copy could be to an aliased physreg. - if (!lr) continue; + if (!OtherVNI) + continue; // DstReg is known to be a register in the RHS interval. If the src is // from the LHS interval, we can use its value #. - if (!CP.isCoalescable(MI) && - !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) + if (CP.isCoalescable(MI)) + DeadCopies.push_back(MI); + else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, + DupCopies)) continue; - RHSValsDefinedFromLHS[VNI] = lr->valno; + RHSValsDefinedFromLHS[VNI] = OtherVNI; } LHSValNoAssignments.resize(LHS.getNumValNums(), -1); @@ -1563,6 +1345,10 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { LiveInterval::const_iterator J = RHS.begin(); LiveInterval::const_iterator JE = RHS.end(); + // Collect interval end points that will no longer be kills. + SmallVector<MachineInstr*, 8> LHSOldKills; + SmallVector<MachineInstr*, 8> RHSOldKills; + // Skip ahead until the first place of potential sharing. if (I != IE && J != JE) { if (I->start < J->start) { @@ -1576,20 +1362,21 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { while (I != IE && J != JE) { // Determine if these two live ranges overlap. - bool Overlaps; - if (I->start < J->start) { - Overlaps = I->end > J->start; - } else { - Overlaps = J->end > I->start; - } - // If so, check value # info to determine if they are really different. - if (Overlaps) { + if (I->end > J->start && J->end > I->start) { // If the live range overlap will map to the same value number in the // result liverange, we can still coalesce them. If not, we can't. if (LHSValNoAssignments[I->valno->id] != RHSValNoAssignments[J->valno->id]) return false; + + // Extended live ranges should no longer be killed. + if (!I->end.isBlock() && I->end < J->end) + if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end)) + LHSOldKills.push_back(MI); + if (!J->end.isBlock() && J->end < I->end) + if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end)) + RHSOldKills.push_back(MI); } if (I->end < J->end) @@ -1598,47 +1385,48 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { ++J; } - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), - E = LHSValsDefinedFromRHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned LHSValID = LHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[LHSValID]->setHasPHIKill(true); - } - - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), - E = RHSValsDefinedFromLHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned RHSValID = RHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[RHSValID]->setHasPHIKill(true); - } + // Clear kill flags where live ranges are extended. + while (!LHSOldKills.empty()) + LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI); + while (!RHSOldKills.empty()) + RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI); if (LHSValNoAssignments.empty()) LHSValNoAssignments.push_back(-1); if (RHSValNoAssignments.empty()) RHSValNoAssignments.push_back(-1); + // Now erase all the redundant copies. + for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) { + MachineInstr *MI = DeadCopies[i]; + if (!ErasedInstrs.insert(MI)) + continue; + DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI) + << '\t' << *MI); + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + SmallVector<unsigned, 8> SourceRegisters; for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(), E = DupCopies.end(); I != E; ++I) { MachineInstr *MI = *I; + if (!ErasedInstrs.insert(MI)) + continue; - // We have pretended that the assignment to B in + // If MI is a copy, then we have pretended that the assignment to B in // A = X // B = X // was actually a copy from A. Now that we decided to coalesce A and B, // transform the code into // A = X - // X = X - // and mark the X as coalesced to keep the illusion. - unsigned Src = MI->getOperand(1).getReg(); - SourceRegisters.push_back(Src); - MI->getOperand(0).substVirtReg(Src, 0, *TRI); - - markAsJoined(MI); + // In the case of the implicit_def, we just have to remove it. + if (!MI->isImplicitDef()) { + unsigned Src = MI->getOperand(1).getReg(); + SourceRegisters.push_back(Src); + } + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); } // If B = X was the last use of X in a liverange, we have to shrink it now @@ -1678,73 +1466,58 @@ namespace { }; } -void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector<MachineInstr*> &TryAgain) { - DEBUG(dbgs() << MBB->getName() << ":\n"); - - SmallVector<MachineInstr*, 8> VirtCopies; - SmallVector<MachineInstr*, 8> PhysCopies; - SmallVector<MachineInstr*, 8> ImpDefCopies; - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E;) { - MachineInstr *Inst = MII++; - - // If this isn't a copy nor a extract_subreg, we can't join intervals. - unsigned SrcReg, DstReg; - if (Inst->isCopy()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(1).getReg(); - } else if (Inst->isSubregToReg()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(2).getReg(); - } else +// Try joining WorkList copies starting from index From. +// Null out any successful joins. +bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) { + assert(From <= WorkList.size() && "Out of range"); + bool Progress = false; + for (unsigned i = From, e = WorkList.size(); i != e; ++i) { + if (!WorkList[i]) continue; - - bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (LIS->hasInterval(SrcReg) && LIS->getInterval(SrcReg).empty()) - ImpDefCopies.push_back(Inst); - else if (SrcIsPhys || DstIsPhys) - PhysCopies.push_back(Inst); - else - VirtCopies.push_back(Inst); - } - - // Try coalescing implicit copies and insert_subreg <undef> first, - // followed by copies to / from physical registers, then finally copies - // from virtual registers to virtual registers. - for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { - MachineInstr *TheCopy = ImpDefCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } - for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { - MachineInstr *TheCopy = PhysCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } - for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { - MachineInstr *TheCopy = VirtCopies[i]; + // Skip instruction pointers that have already been erased, for example by + // dead code elimination. + if (ErasedInstrs.erase(WorkList[i])) { + WorkList[i] = 0; + continue; + } bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); + bool Success = joinCopy(WorkList[i], Again); + Progress |= Success; + if (Success || !Again) + WorkList[i] = 0; } + return Progress; } -void RegisterCoalescer::joinIntervals() { +void +RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { + DEBUG(dbgs() << MBB->getName() << ":\n"); + + // Collect all copy-like instructions in MBB. Don't start coalescing anything + // yet, it might invalidate the iterator. + const unsigned PrevSize = WorkList.size(); + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) + if (MII->isCopyLike()) + WorkList.push_back(MII); + + // Try coalescing the collected copies immediately, and remove the nulls. + // This prevents the WorkList from getting too large since most copies are + // joinable on the first attempt. + if (copyCoalesceWorkList(PrevSize)) + WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), + (MachineInstr*)0), WorkList.end()); +} + +void RegisterCoalescer::joinAllIntervals() { DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); + assert(WorkList.empty() && "Old data still around."); - std::vector<MachineInstr*> TryAgainList; if (Loops->empty()) { // If there are no loops in the function, join intervals in function order. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - CopyCoalesceInMBB(I, TryAgainList); + copyCoalesceInMBB(I); } else { // Otherwise, join intervals in inner loops before other intervals. // Unfortunately we can't just iterate over loop hierarchy here because @@ -1763,34 +1536,20 @@ void RegisterCoalescer::joinIntervals() { // Finally, join intervals in loop nest order. for (unsigned i = 0, e = MBBs.size(); i != e; ++i) - CopyCoalesceInMBB(MBBs[i].second, TryAgainList); + copyCoalesceInMBB(MBBs[i].second); } // Joining intervals can allow other intervals to be joined. Iteratively join // until we make no progress. - bool ProgressMade = true; - while (ProgressMade) { - ProgressMade = false; - - for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { - MachineInstr *&TheCopy = TryAgainList[i]; - if (!TheCopy) - continue; - - bool Again = false; - bool Success = JoinCopy(TheCopy, Again); - if (Success || !Again) { - TheCopy= 0; // Mark this one as done. - ProgressMade = true; - } - } - } + while (copyCoalesceWorkList()) + /* empty */ ; } void RegisterCoalescer::releaseMemory() { - JoinedCopies.clear(); - ReMatCopies.clear(); - ReMatDefs.clear(); + ErasedInstrs.clear(); + WorkList.clear(); + DeadDefs.clear(); + InflateRegs.clear(); } bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { @@ -1814,138 +1573,11 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { RegClassInfo.runOnMachineFunction(fn); // Join (coalesce) intervals if requested. - if (EnableJoining) { - joinIntervals(); - DEBUG({ - dbgs() << "********** INTERVALS POST JOINING **********\n"; - for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); - I != E; ++I){ - I->second->print(dbgs(), TRI); - dbgs() << "\n"; - } - }); - } - - // Perform a final pass over the instructions and compute spill weights - // and remove identity moves. - SmallVector<unsigned, 4> DeadDefs, InflateRegs; - for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); - mbbi != mbbe; ++mbbi) { - MachineBasicBlock* mbb = mbbi; - for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); - mii != mie; ) { - MachineInstr *MI = mii; - if (JoinedCopies.count(MI)) { - // Delete all coalesced copies. - bool DoDelete = true; - assert(MI->isCopyLike() && "Unrecognized copy instruction"); - unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); - unsigned DstReg = MI->getOperand(0).getReg(); - - // Collect candidates for register class inflation. - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg))) - InflateRegs.push_back(SrcReg); - if (TargetRegisterInfo::isVirtualRegister(DstReg) && - RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg))) - InflateRegs.push_back(DstReg); - - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - MI->getNumOperands() > 2) - // Do not delete extract_subreg, insert_subreg of physical - // registers unless the definition is dead. e.g. - // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 - // or else the scavenger may complain. LowerSubregs will - // delete them later. - DoDelete = false; - - if (MI->allDefsAreDead()) { - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - LIS->hasInterval(SrcReg)) - LIS->shrinkToUses(&LIS->getInterval(SrcReg)); - DoDelete = true; - } - if (!DoDelete) { - // We need the instruction to adjust liveness, so make it a KILL. - if (MI->isSubregToReg()) { - MI->RemoveOperand(3); - MI->RemoveOperand(1); - } - MI->setDesc(TII->get(TargetOpcode::KILL)); - mii = llvm::next(mii); - } else { - LIS->RemoveMachineInstrFromMaps(MI); - mii = mbbi->erase(mii); - ++numPeep; - } - continue; - } - - // Now check if this is a remat'ed def instruction which is now dead. - if (ReMatDefs.count(MI)) { - bool isDead = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - DeadDefs.push_back(Reg); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - // Remat may also enable register class inflation. - if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg))) - InflateRegs.push_back(Reg); - } - if (MO.isDead()) - continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI->use_nodbg_empty(Reg)) { - isDead = false; - break; - } - } - if (isDead) { - while (!DeadDefs.empty()) { - unsigned DeadDef = DeadDefs.back(); - DeadDefs.pop_back(); - RemoveDeadDef(LIS->getInterval(DeadDef), MI); - } - LIS->RemoveMachineInstrFromMaps(mii); - mii = mbbi->erase(mii); - continue; - } else - DeadDefs.clear(); - } - - ++mii; - - // Check for now unnecessary kill flags. - if (LIS->isNotInMIMap(MI)) continue; - SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isKill()) continue; - unsigned reg = MO.getReg(); - if (!reg || !LIS->hasInterval(reg)) continue; - if (!LIS->getInterval(reg).killedAt(DefIdx)) { - MO.setIsKill(false); - continue; - } - // When leaving a kill flag on a physreg, check if any subregs should - // remain alive. - if (!TargetRegisterInfo::isPhysicalRegister(reg)) - continue; - for (const uint16_t *SR = TRI->getSubRegisters(reg); - unsigned S = *SR; ++SR) - if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx)) - MI->addRegisterDefined(S, TRI); - } - } - } + if (EnableJoining) + joinAllIntervals(); // After deleting a lot of copies, register classes may be less constrained. - // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 -> + // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> // DPR inflation. array_pod_sort(InflateRegs.begin(), InflateRegs.end()); InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index 310b933..8a6df98 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -26,7 +26,6 @@ namespace llvm { /// two registers can be coalesced, CoalescerPair can determine if a copy /// instruction would become an identity copy after coalescing. class CoalescerPair { - const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; /// DstReg - The register that will be left after coalescing. It can be a @@ -36,10 +35,13 @@ namespace llvm { /// SrcReg - the virtual register that will be coalesced into dstReg. unsigned SrcReg; - /// subReg_ - The subregister index of srcReg in DstReg. It is possible the - /// coalesce SrcReg into a subreg of the larger DstReg when DstReg is a - /// virtual register. - unsigned SubIdx; + /// DstIdx - The sub-register index of the old DstReg in the new coalesced + /// register. + unsigned DstIdx; + + /// SrcIdx - The sub-register index of the old SrcReg in the new coalesced + /// register. + unsigned SrcIdx; /// Partial - True when the original copy was a partial subregister copy. bool Partial; @@ -52,12 +54,13 @@ namespace llvm { bool Flipped; /// NewRC - The register class of the coalesced register, or NULL if DstReg - /// is a physreg. + /// is a physreg. This register class may be a super-register of both + /// SrcReg and DstReg. const TargetRegisterClass *NewRC; public: - CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri) - : TII(tii), TRI(tri), DstReg(0), SrcReg(0), SubIdx(0), + CoalescerPair(const TargetRegisterInfo &tri) + : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0), Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} /// setRegisters - set registers to match the copy instruction MI. Return @@ -94,9 +97,13 @@ namespace llvm { /// getSrcReg - Return the virtual register that will be coalesced away. unsigned getSrcReg() const { return SrcReg; } - /// getSubIdx - Return the subregister index in DstReg that SrcReg will be - /// coalesced into, or 0. - unsigned getSubIdx() const { return SubIdx; } + /// getDstIdx - Return the subregister index that DstReg will be coalesced + /// into, or 0. + unsigned getDstIdx() const { return DstIdx; } + + /// getSrcIdx - Return the subregister index that SrcReg will be coalesced + /// into, or 0. + unsigned getSrcIdx() const { return SrcIdx; } /// getNewRC - Return the register class of the coalesced register. const TargetRegisterClass *getNewRC() const { return NewRC; } diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp new file mode 100644 index 0000000..43448c8 --- /dev/null +++ b/lib/CodeGen/RegisterPressure.cpp @@ -0,0 +1,841 @@ +//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RegisterPressure class which can be used to track +// MachineInstr level register pressure. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +/// Increase register pressure for each set impacted by this register class. +static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, + std::vector<unsigned> &MaxSetPressure, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) { + unsigned Weight = TRI->getRegClassWeight(RC).RegWeight; + for (const int *PSet = TRI->getRegClassPressureSets(RC); + *PSet != -1; ++PSet) { + CurrSetPressure[*PSet] += Weight; + if (&CurrSetPressure != &MaxSetPressure + && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) { + MaxSetPressure[*PSet] = CurrSetPressure[*PSet]; + } + } +} + +/// Decrease register pressure for each set impacted by this register class. +static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) { + unsigned Weight = TRI->getRegClassWeight(RC).RegWeight; + for (const int *PSet = TRI->getRegClassPressureSets(RC); + *PSet != -1; ++PSet) { + assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow"); + CurrSetPressure[*PSet] -= Weight; + } +} + +/// Directly increase pressure only within this RegisterPressure result. +void RegisterPressure::increase(const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) { + increaseSetPressure(MaxSetPressure, MaxSetPressure, RC, TRI); +} + +/// Directly decrease pressure only within this RegisterPressure result. +void RegisterPressure::decrease(const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) { + decreaseSetPressure(MaxSetPressure, RC, TRI); +} + +void RegisterPressure::dump(const TargetRegisterInfo *TRI) { + dbgs() << "Live In: "; + for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) + dbgs() << PrintReg(LiveInRegs[i], TRI) << " "; + dbgs() << '\n'; + dbgs() << "Live Out: "; + for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i) + dbgs() << PrintReg(LiveOutRegs[i], TRI) << " "; + dbgs() << '\n'; + for (unsigned i = 0, e = MaxSetPressure.size(); i < e; ++i) { + if (MaxSetPressure[i] != 0) + dbgs() << TRI->getRegPressureSetName(i) << "=" << MaxSetPressure[i] + << '\n'; + } +} + +/// Increase the current pressure as impacted by these physical registers and +/// bump the high water mark if needed. +void RegPressureTracker::increasePhysRegPressure(ArrayRef<unsigned> Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) + increaseSetPressure(CurrSetPressure, P.MaxSetPressure, + TRI->getMinimalPhysRegClass(Regs[I]), TRI); +} + +/// Simply decrease the current pressure as impacted by these physcial +/// registers. +void RegPressureTracker::decreasePhysRegPressure(ArrayRef<unsigned> Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) + decreaseSetPressure(CurrSetPressure, TRI->getMinimalPhysRegClass(Regs[I]), + TRI); +} + +/// Increase the current pressure as impacted by these virtual registers and +/// bump the high water mark if needed. +void RegPressureTracker::increaseVirtRegPressure(ArrayRef<unsigned> Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) + increaseSetPressure(CurrSetPressure, P.MaxSetPressure, + MRI->getRegClass(Regs[I]), TRI); +} + +/// Simply decrease the current pressure as impacted by these virtual registers. +void RegPressureTracker::decreaseVirtRegPressure(ArrayRef<unsigned> Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) + decreaseSetPressure(CurrSetPressure, MRI->getRegClass(Regs[I]), TRI); +} + +/// Clear the result so it can be used for another round of pressure tracking. +void IntervalPressure::reset() { + TopIdx = BottomIdx = SlotIndex(); + MaxSetPressure.clear(); + LiveInRegs.clear(); + LiveOutRegs.clear(); +} + +/// Clear the result so it can be used for another round of pressure tracking. +void RegionPressure::reset() { + TopPos = BottomPos = MachineBasicBlock::const_iterator(); + MaxSetPressure.clear(); + LiveInRegs.clear(); + LiveOutRegs.clear(); +} + +/// If the current top is not less than or equal to the next index, open it. +/// We happen to need the SlotIndex for the next top for pressure update. +void IntervalPressure::openTop(SlotIndex NextTop) { + if (TopIdx <= NextTop) + return; + TopIdx = SlotIndex(); + LiveInRegs.clear(); +} + +/// If the current top is the previous instruction (before receding), open it. +void RegionPressure::openTop(MachineBasicBlock::const_iterator PrevTop) { + if (TopPos != PrevTop) + return; + TopPos = MachineBasicBlock::const_iterator(); + LiveInRegs.clear(); +} + +/// If the current bottom is not greater than the previous index, open it. +void IntervalPressure::openBottom(SlotIndex PrevBottom) { + if (BottomIdx > PrevBottom) + return; + BottomIdx = SlotIndex(); + LiveInRegs.clear(); +} + +/// If the current bottom is the previous instr (before advancing), open it. +void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) { + if (BottomPos != PrevBottom) + return; + BottomPos = MachineBasicBlock::const_iterator(); + LiveInRegs.clear(); +} + +/// Setup the RegPressureTracker. +/// +/// TODO: Add support for pressure without LiveIntervals. +void RegPressureTracker::init(const MachineFunction *mf, + const RegisterClassInfo *rci, + const LiveIntervals *lis, + const MachineBasicBlock *mbb, + MachineBasicBlock::const_iterator pos) +{ + MF = mf; + TRI = MF->getTarget().getRegisterInfo(); + RCI = rci; + MRI = &MF->getRegInfo(); + MBB = mbb; + + if (RequireIntervals) { + assert(lis && "IntervalPressure requires LiveIntervals"); + LIS = lis; + } + + CurrPos = pos; + while (CurrPos != MBB->end() && CurrPos->isDebugValue()) + ++CurrPos; + + CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0); + + if (RequireIntervals) + static_cast<IntervalPressure&>(P).reset(); + else + static_cast<RegionPressure&>(P).reset(); + P.MaxSetPressure = CurrSetPressure; + + LivePhysRegs.clear(); + LivePhysRegs.setUniverse(TRI->getNumRegs()); + LiveVirtRegs.clear(); + LiveVirtRegs.setUniverse(MRI->getNumVirtRegs()); +} + +/// Does this pressure result have a valid top position and live ins. +bool RegPressureTracker::isTopClosed() const { + if (RequireIntervals) + return static_cast<IntervalPressure&>(P).TopIdx.isValid(); + return (static_cast<RegionPressure&>(P).TopPos == + MachineBasicBlock::const_iterator()); +} + +/// Does this pressure result have a valid bottom position and live outs. +bool RegPressureTracker::isBottomClosed() const { + if (RequireIntervals) + return static_cast<IntervalPressure&>(P).BottomIdx.isValid(); + return (static_cast<RegionPressure&>(P).BottomPos == + MachineBasicBlock::const_iterator()); +} + +/// Set the boundary for the top of the region and summarize live ins. +void RegPressureTracker::closeTop() { + if (RequireIntervals) + static_cast<IntervalPressure&>(P).TopIdx = + LIS->getInstructionIndex(CurrPos).getRegSlot(); + else + static_cast<RegionPressure&>(P).TopPos = CurrPos; + + assert(P.LiveInRegs.empty() && "inconsistent max pressure result"); + P.LiveInRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size()); + P.LiveInRegs.append(LivePhysRegs.begin(), LivePhysRegs.end()); + for (SparseSet<unsigned>::const_iterator I = + LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I) + P.LiveInRegs.push_back(*I); + std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end()); + P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()), + P.LiveInRegs.end()); +} + +/// Set the boundary for the bottom of the region and summarize live outs. +void RegPressureTracker::closeBottom() { + if (RequireIntervals) + if (CurrPos == MBB->end()) + static_cast<IntervalPressure&>(P).BottomIdx = LIS->getMBBEndIdx(MBB); + else + static_cast<IntervalPressure&>(P).BottomIdx = + LIS->getInstructionIndex(CurrPos).getRegSlot(); + else + static_cast<RegionPressure&>(P).BottomPos = CurrPos; + + assert(P.LiveOutRegs.empty() && "inconsistent max pressure result"); + P.LiveOutRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size()); + P.LiveOutRegs.append(LivePhysRegs.begin(), LivePhysRegs.end()); + for (SparseSet<unsigned>::const_iterator I = + LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I) + P.LiveOutRegs.push_back(*I); + std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end()); + P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()), + P.LiveOutRegs.end()); +} + +/// Finalize the region boundaries and record live ins and live outs. +void RegPressureTracker::closeRegion() { + if (!isTopClosed() && !isBottomClosed()) { + assert(LivePhysRegs.empty() && LiveVirtRegs.empty() && + "no region boundary"); + return; + } + if (!isBottomClosed()) + closeBottom(); + else if (!isTopClosed()) + closeTop(); + // If both top and bottom are closed, do nothing. +} + +/// Return true if Reg aliases a register in Regs SparseSet. +static bool hasRegAlias(unsigned Reg, SparseSet<unsigned> &Regs, + const TargetRegisterInfo *TRI) { + assert(!TargetRegisterInfo::isVirtualRegister(Reg) && "only for physregs"); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + if (Regs.count(*AI)) + return true; + return false; +} + +/// Return true if Reg aliases a register in unsorted Regs SmallVector. +/// This is only valid for physical registers. +static SmallVectorImpl<unsigned>::iterator +findRegAlias(unsigned Reg, SmallVectorImpl<unsigned> &Regs, + const TargetRegisterInfo *TRI) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + SmallVectorImpl<unsigned>::iterator I = + std::find(Regs.begin(), Regs.end(), *AI); + if (I != Regs.end()) + return I; + } + return Regs.end(); +} + +/// Return true if Reg can be inserted into Regs SmallVector. For virtual +/// register, do a linear search. For physical registers check for aliases. +static SmallVectorImpl<unsigned>::iterator +findReg(unsigned Reg, bool isVReg, SmallVectorImpl<unsigned> &Regs, + const TargetRegisterInfo *TRI) { + if(isVReg) + return std::find(Regs.begin(), Regs.end(), Reg); + return findRegAlias(Reg, Regs, TRI); +} + +/// Collect this instruction's unique uses and defs into SmallVectors for +/// processing defs and uses in order. +template<bool isVReg> +struct RegisterOperands { + SmallVector<unsigned, 8> Uses; + SmallVector<unsigned, 8> Defs; + SmallVector<unsigned, 8> DeadDefs; + + /// Push this operand's register onto the correct vector. + void collect(const MachineOperand &MO, const TargetRegisterInfo *TRI) { + if (MO.readsReg()) { + if (findReg(MO.getReg(), isVReg, Uses, TRI) == Uses.end()) + Uses.push_back(MO.getReg()); + } + if (MO.isDef()) { + if (MO.isDead()) { + if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end()) + DeadDefs.push_back(MO.getReg()); + } + else { + if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end()) + Defs.push_back(MO.getReg()); + } + } + } +}; +typedef RegisterOperands<false> PhysRegOperands; +typedef RegisterOperands<true> VirtRegOperands; + +/// Collect physical and virtual register operands. +static void collectOperands(const MachineInstr *MI, + PhysRegOperands &PhysRegOpers, + VirtRegOperands &VirtRegOpers, + const TargetRegisterInfo *TRI, + const RegisterClassInfo *RCI) { + for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) { + const MachineOperand &MO = *OperI; + if (!MO.isReg() || !MO.getReg()) + continue; + + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + VirtRegOpers.collect(MO, TRI); + else if (RCI->isAllocatable(MO.getReg())) + PhysRegOpers.collect(MO, TRI); + } + // Remove redundant physreg dead defs. + for (unsigned i = PhysRegOpers.DeadDefs.size(); i > 0; --i) { + unsigned Reg = PhysRegOpers.DeadDefs[i-1]; + if (findRegAlias(Reg, PhysRegOpers.Defs, TRI) != PhysRegOpers.Defs.end()) + PhysRegOpers.DeadDefs.erase(&PhysRegOpers.DeadDefs[i-1]); + } +} + +/// Force liveness of registers. +void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) { + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + if (TargetRegisterInfo::isVirtualRegister(Regs[i])) { + if (LiveVirtRegs.insert(Regs[i]).second) + increaseVirtRegPressure(Regs[i]); + } + else { + if (!hasRegAlias(Regs[i], LivePhysRegs, TRI)) { + LivePhysRegs.insert(Regs[i]); + increasePhysRegPressure(Regs[i]); + } + } + } +} + +/// Add PhysReg to the live in set and increase max pressure. +void RegPressureTracker::discoverPhysLiveIn(unsigned Reg) { + assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice"); + if (findRegAlias(Reg, P.LiveInRegs, TRI) != P.LiveInRegs.end()) + return; + + // At live in discovery, unconditionally increase the high water mark. + P.LiveInRegs.push_back(Reg); + P.increase(TRI->getMinimalPhysRegClass(Reg), TRI); +} + +/// Add PhysReg to the live out set and increase max pressure. +void RegPressureTracker::discoverPhysLiveOut(unsigned Reg) { + assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice"); + if (findRegAlias(Reg, P.LiveOutRegs, TRI) != P.LiveOutRegs.end()) + return; + + // At live out discovery, unconditionally increase the high water mark. + P.LiveOutRegs.push_back(Reg); + P.increase(TRI->getMinimalPhysRegClass(Reg), TRI); +} + +/// Add VirtReg to the live in set and increase max pressure. +void RegPressureTracker::discoverVirtLiveIn(unsigned Reg) { + assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice"); + if (std::find(P.LiveInRegs.begin(), P.LiveInRegs.end(), Reg) != + P.LiveInRegs.end()) + return; + + // At live in discovery, unconditionally increase the high water mark. + P.LiveInRegs.push_back(Reg); + P.increase(MRI->getRegClass(Reg), TRI); +} + +/// Add VirtReg to the live out set and increase max pressure. +void RegPressureTracker::discoverVirtLiveOut(unsigned Reg) { + assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice"); + if (std::find(P.LiveOutRegs.begin(), P.LiveOutRegs.end(), Reg) != + P.LiveOutRegs.end()) + return; + + // At live out discovery, unconditionally increase the high water mark. + P.LiveOutRegs.push_back(Reg); + P.increase(MRI->getRegClass(Reg), TRI); +} + +/// Recede across the previous instruction. +bool RegPressureTracker::recede() { + // Check for the top of the analyzable region. + if (CurrPos == MBB->begin()) { + closeRegion(); + return false; + } + if (!isBottomClosed()) + closeBottom(); + + // Open the top of the region using block iterators. + if (!RequireIntervals && isTopClosed()) + static_cast<RegionPressure&>(P).openTop(CurrPos); + + // Find the previous instruction. + do + --CurrPos; + while (CurrPos != MBB->begin() && CurrPos->isDebugValue()); + + if (CurrPos->isDebugValue()) { + closeRegion(); + return false; + } + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + + // Open the top of the region using slot indexes. + if (RequireIntervals && isTopClosed()) + static_cast<IntervalPressure&>(P).openTop(SlotIdx); + + PhysRegOperands PhysRegOpers; + VirtRegOperands VirtRegOpers; + collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI); + + // Boost pressure for all dead defs together. + increasePhysRegPressure(PhysRegOpers.DeadDefs); + increaseVirtRegPressure(VirtRegOpers.DeadDefs); + decreasePhysRegPressure(PhysRegOpers.DeadDefs); + decreaseVirtRegPressure(VirtRegOpers.DeadDefs); + + // Kill liveness at live defs. + // TODO: consider earlyclobbers? + for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Defs[i]; + if (LivePhysRegs.erase(Reg)) + decreasePhysRegPressure(Reg); + else + discoverPhysLiveOut(Reg); + } + for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Defs[i]; + if (LiveVirtRegs.erase(Reg)) + decreaseVirtRegPressure(Reg); + else + discoverVirtLiveOut(Reg); + } + + // Generate liveness for uses. + for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Uses[i]; + if (!hasRegAlias(Reg, LivePhysRegs, TRI)) { + increasePhysRegPressure(Reg); + LivePhysRegs.insert(Reg); + } + } + for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Uses[i]; + if (!LiveVirtRegs.count(Reg)) { + // Adjust liveouts if LiveIntervals are available. + if (RequireIntervals) { + const LiveInterval *LI = &LIS->getInterval(Reg); + if (!LI->killedAt(SlotIdx)) + discoverVirtLiveOut(Reg); + } + increaseVirtRegPressure(Reg); + LiveVirtRegs.insert(Reg); + } + } + return true; +} + +/// Advance across the current instruction. +bool RegPressureTracker::advance() { + // Check for the bottom of the analyzable region. + if (CurrPos == MBB->end()) { + closeRegion(); + return false; + } + if (!isTopClosed()) + closeTop(); + + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + + // Open the bottom of the region using slot indexes. + if (isBottomClosed()) { + if (RequireIntervals) + static_cast<IntervalPressure&>(P).openBottom(SlotIdx); + else + static_cast<RegionPressure&>(P).openBottom(CurrPos); + } + + PhysRegOperands PhysRegOpers; + VirtRegOperands VirtRegOpers; + collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI); + + // Kill liveness at last uses. + for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Uses[i]; + if (!hasRegAlias(Reg, LivePhysRegs, TRI)) + discoverPhysLiveIn(Reg); + else { + // Allocatable physregs are always single-use before regalloc. + decreasePhysRegPressure(Reg); + LivePhysRegs.erase(Reg); + } + } + for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Uses[i]; + if (RequireIntervals) { + const LiveInterval *LI = &LIS->getInterval(Reg); + if (LI->killedAt(SlotIdx)) { + if (LiveVirtRegs.erase(Reg)) + decreaseVirtRegPressure(Reg); + else + discoverVirtLiveIn(Reg); + } + } + else if (!LiveVirtRegs.count(Reg)) { + discoverVirtLiveIn(Reg); + increaseVirtRegPressure(Reg); + } + } + + // Generate liveness for defs. + for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Defs[i]; + if (!hasRegAlias(Reg, LivePhysRegs, TRI)) { + increasePhysRegPressure(Reg); + LivePhysRegs.insert(Reg); + } + } + for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Defs[i]; + if (LiveVirtRegs.insert(Reg).second) + increaseVirtRegPressure(Reg); + } + + // Boost pressure for all dead defs together. + increasePhysRegPressure(PhysRegOpers.DeadDefs); + increaseVirtRegPressure(VirtRegOpers.DeadDefs); + decreasePhysRegPressure(PhysRegOpers.DeadDefs); + decreaseVirtRegPressure(VirtRegOpers.DeadDefs); + + // Find the next instruction. + do + ++CurrPos; + while (CurrPos != MBB->end() && CurrPos->isDebugValue()); + return true; +} + +/// Find the max change in excess pressure across all sets. +static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, + ArrayRef<unsigned> NewPressureVec, + RegPressureDelta &Delta, + const TargetRegisterInfo *TRI) { + int ExcessUnits = 0; + unsigned PSetID = ~0U; + for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) { + unsigned POld = OldPressureVec[i]; + unsigned PNew = NewPressureVec[i]; + int PDiff = (int)PNew - (int)POld; + if (!PDiff) // No change in this set in the common case. + continue; + // Only consider change beyond the limit. + unsigned Limit = TRI->getRegPressureSetLimit(i); + if (Limit > POld) { + if (Limit > PNew) + PDiff = 0; // Under the limit + else + PDiff = PNew - Limit; // Just exceeded limit. + } + else if (Limit > PNew) + PDiff = Limit - POld; // Just obeyed limit. + + if (std::abs(PDiff) > std::abs(ExcessUnits)) { + ExcessUnits = PDiff; + PSetID = i; + } + } + Delta.Excess.PSetID = PSetID; + Delta.Excess.UnitIncrease = ExcessUnits; +} + +/// Find the max change in max pressure that either surpasses a critical PSet +/// limit or exceeds the current MaxPressureLimit. +/// +/// FIXME: comparing each element of the old and new MaxPressure vectors here is +/// silly. It's done now to demonstrate the concept but will go away with a +/// RegPressureTracker API change to work with pressure differences. +static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, + ArrayRef<unsigned> NewMaxPressureVec, + ArrayRef<PressureElement> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit, + RegPressureDelta &Delta) { + Delta.CriticalMax = PressureElement(); + Delta.CurrentMax = PressureElement(); + + unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); + for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) { + unsigned POld = OldMaxPressureVec[i]; + unsigned PNew = NewMaxPressureVec[i]; + if (PNew == POld) // No change in this set in the common case. + continue; + + while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i) + ++CritIdx; + + if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) { + int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease; + if (PDiff > Delta.CriticalMax.UnitIncrease) { + Delta.CriticalMax.PSetID = i; + Delta.CriticalMax.UnitIncrease = PDiff; + } + } + + // Find the greatest increase above MaxPressureLimit. + // (Ignores negative MDiff). + int MDiff = (int)PNew - (int)MaxPressureLimit[i]; + if (MDiff > Delta.CurrentMax.UnitIncrease) { + Delta.CurrentMax.PSetID = i; + Delta.CurrentMax.UnitIncrease = PNew; + } + } +} + +/// Record the upward impact of a single instruction on current register +/// pressure. Unlike the advance/recede pressure tracking interface, this does +/// not discover live in/outs. +/// +/// This is intended for speculative queries. It leaves pressure inconsistent +/// with the current position, so must be restored by the caller. +void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { + // Account for register pressure similar to RegPressureTracker::recede(). + PhysRegOperands PhysRegOpers; + VirtRegOperands VirtRegOpers; + collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI); + + // Boost max pressure for all dead defs together. + // Since CurrSetPressure and MaxSetPressure + increasePhysRegPressure(PhysRegOpers.DeadDefs); + increaseVirtRegPressure(VirtRegOpers.DeadDefs); + decreasePhysRegPressure(PhysRegOpers.DeadDefs); + decreaseVirtRegPressure(VirtRegOpers.DeadDefs); + + // Kill liveness at live defs. + decreasePhysRegPressure(PhysRegOpers.Defs); + decreaseVirtRegPressure(VirtRegOpers.Defs); + + // Generate liveness for uses. + for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Uses[i]; + if (!hasRegAlias(Reg, LivePhysRegs, TRI)) + increasePhysRegPressure(Reg); + } + for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Uses[i]; + if (!LiveVirtRegs.count(Reg)) + increaseVirtRegPressure(Reg); + } +} + +/// Consider the pressure increase caused by traversing this instruction +/// bottom-up. Find the pressure set with the most change beyond its pressure +/// limit based on the tracker's current pressure, and return the change in +/// number of register units of that pressure set introduced by this +/// instruction. +/// +/// This assumes that the current LiveOut set is sufficient. +/// +/// FIXME: This is expensive for an on-the-fly query. We need to cache the +/// result per-SUnit with enough information to adjust for the current +/// scheduling position. But this works as a proof of concept. +void RegPressureTracker:: +getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, + ArrayRef<PressureElement> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit) { + // Snapshot Pressure. + // FIXME: The snapshot heap space should persist. But I'm planning to + // summarize the pressure effect so we don't need to snapshot at all. + std::vector<unsigned> SavedPressure = CurrSetPressure; + std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure; + + bumpUpwardPressure(MI); + + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, + MaxPressureLimit, Delta); + assert(Delta.CriticalMax.UnitIncrease >= 0 && + Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + + // Restore the tracker's state. + P.MaxSetPressure.swap(SavedMaxPressure); + CurrSetPressure.swap(SavedPressure); +} + +/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). +static bool findUseBetween(unsigned Reg, + SlotIndex PriorUseIdx, SlotIndex NextUseIdx, + const MachineRegisterInfo *MRI, + const LiveIntervals *LIS) { + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end(); + UI != UE; UI.skipInstruction()) { + const MachineInstr* MI = &*UI; + SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot(); + if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) + return true; + } + return false; +} + +/// Record the downward impact of a single instruction on current register +/// pressure. Unlike the advance/recede pressure tracking interface, this does +/// not discover live in/outs. +/// +/// This is intended for speculative queries. It leaves pressure inconsistent +/// with the current position, so must be restored by the caller. +void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { + // Account for register pressure similar to RegPressureTracker::recede(). + PhysRegOperands PhysRegOpers; + VirtRegOperands VirtRegOpers; + collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI); + + // Kill liveness at last uses. Assume allocatable physregs are single-use + // rather than checking LiveIntervals. + decreasePhysRegPressure(PhysRegOpers.Uses); + if (RequireIntervals) { + SlotIndex SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); + for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Uses[i]; + const LiveInterval *LI = &LIS->getInterval(Reg); + // FIXME: allow the caller to pass in the list of vreg uses that remain to + // be bottom-scheduled to avoid searching uses at each query. + SlotIndex CurrIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + if (LI->killedAt(SlotIdx) + && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { + decreaseVirtRegPressure(Reg); + } + } + } + + // Generate liveness for defs. + increasePhysRegPressure(PhysRegOpers.Defs); + increaseVirtRegPressure(VirtRegOpers.Defs); + + // Boost pressure for all dead defs together. + increasePhysRegPressure(PhysRegOpers.DeadDefs); + increaseVirtRegPressure(VirtRegOpers.DeadDefs); + decreasePhysRegPressure(PhysRegOpers.DeadDefs); + decreaseVirtRegPressure(VirtRegOpers.DeadDefs); +} + +/// Consider the pressure increase caused by traversing this instruction +/// top-down. Find the register class with the most change in its pressure limit +/// based on the tracker's current pressure, and return the number of excess +/// register units of that pressure set introduced by this instruction. +/// +/// This assumes that the current LiveIn set is sufficient. +void RegPressureTracker:: +getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, + ArrayRef<PressureElement> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit) { + // Snapshot Pressure. + std::vector<unsigned> SavedPressure = CurrSetPressure; + std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure; + + bumpDownwardPressure(MI); + + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, + MaxPressureLimit, Delta); + assert(Delta.CriticalMax.UnitIncrease >= 0 && + Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + + // Restore the tracker's state. + P.MaxSetPressure.swap(SavedMaxPressure); + CurrSetPressure.swap(SavedPressure); +} + +/// Get the pressure of each PSet after traversing this instruction bottom-up. +void RegPressureTracker:: +getUpwardPressure(const MachineInstr *MI, + std::vector<unsigned> &PressureResult, + std::vector<unsigned> &MaxPressureResult) { + // Snapshot pressure. + PressureResult = CurrSetPressure; + MaxPressureResult = P.MaxSetPressure; + + bumpUpwardPressure(MI); + + // Current pressure becomes the result. Restore current pressure. + P.MaxSetPressure.swap(MaxPressureResult); + CurrSetPressure.swap(PressureResult); +} + +/// Get the pressure of each PSet after traversing this instruction top-down. +void RegPressureTracker:: +getDownwardPressure(const MachineInstr *MI, + std::vector<unsigned> &PressureResult, + std::vector<unsigned> &MaxPressureResult) { + // Snapshot pressure. + PressureResult = CurrSetPressure; + MaxPressureResult = P.MaxSetPressure; + + bumpDownwardPressure(MI); + + // Current pressure becomes the result. Restore current pressure. + P.MaxSetPressure.swap(MaxPressureResult); + CurrSetPressure.swap(PressureResult); +} diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 03bd82e..d673794 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -37,16 +37,13 @@ using namespace llvm; void RegScavenger::setUsed(unsigned Reg) { RegsAvailable.reset(Reg); - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - RegsAvailable.reset(SubReg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + RegsAvailable.reset(*SubRegs); } bool RegScavenger::isAliasUsed(unsigned Reg) const { - if (isUsed(Reg)) - return true; - for (const uint16_t *R = TRI->getAliasSet(Reg); *R; ++R) - if (isUsed(*R)) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + if (isUsed(*AI)) return true; return false; } @@ -114,8 +111,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { BV.set(Reg); - for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++) - BV.set(*R); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + BV.set(*SubRegs); } void RegScavenger::forward() { @@ -195,9 +192,8 @@ void RegScavenger::forward() { // Ideally we would like a way to model this, but leaving the // insert_subreg around causes both correctness and performance issues. bool SubUsed = false; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - if (isUsed(SubReg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + if (isUsed(*SubRegs)) { SubUsed = true; break; } @@ -296,9 +292,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, isVirtKillInsn = true; continue; } - Candidates.reset(MO.getReg()); - for (const uint16_t *R = TRI->getAliasSet(MO.getReg()); *R; R++) - Candidates.reset(*R); + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Candidates.reset(*AI); } // If we're not in a virtual reg's live range, this is a valid // restore point. diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp deleted file mode 100644 index 6020908..0000000 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ /dev/null @@ -1,1013 +0,0 @@ -//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "rendermf" - -#include "RenderMachineFunction.h" - -#include "VirtRegMap.h" - -#include "llvm/Function.h" -#include "llvm/Module.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" - -#include <sstream> - -using namespace llvm; - -char RenderMachineFunction::ID = 0; -INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf", - "Render machine functions (and related info) to HTML pages", - false, false) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(RenderMachineFunction, "rendermf", - "Render machine functions (and related info) to HTML pages", - false, false) - -static cl::opt<std::string> -outputFileSuffix("rmf-file-suffix", - cl::desc("Appended to function name to get output file name " - "(default: \".html\")"), - cl::init(".html"), cl::Hidden); - -static cl::opt<std::string> -machineFuncsToRender("rmf-funcs", - cl::desc("Comma separated list of functions to render" - ", or \"*\"."), - cl::init(""), cl::Hidden); - -static cl::opt<std::string> -pressureClasses("rmf-classes", - cl::desc("Register classes to render pressure for."), - cl::init(""), cl::Hidden); - -static cl::opt<std::string> -showIntervals("rmf-intervals", - cl::desc("Live intervals to show alongside code."), - cl::init(""), cl::Hidden); - -static cl::opt<bool> -filterEmpty("rmf-filter-empty-intervals", - cl::desc("Don't display empty intervals."), - cl::init(true), cl::Hidden); - -static cl::opt<bool> -showEmptyIndexes("rmf-empty-indexes", - cl::desc("Render indexes not associated with instructions or " - "MBB starts."), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -useFancyVerticals("rmf-fancy-verts", - cl::desc("Use SVG for vertical text."), - cl::init(true), cl::Hidden); - -static cl::opt<bool> -prettyHTML("rmf-pretty-html", - cl::desc("Pretty print HTML. For debugging the renderer only.."), - cl::init(false), cl::Hidden); - - -namespace llvm { - - bool MFRenderingOptions::renderingOptionsProcessed; - std::set<std::string> MFRenderingOptions::mfNamesToRender; - bool MFRenderingOptions::renderAllMFs = false; - - std::set<std::string> MFRenderingOptions::classNamesToRender; - bool MFRenderingOptions::renderAllClasses = false; - - std::set<std::pair<unsigned, unsigned> > - MFRenderingOptions::intervalNumsToRender; - unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly; - - template <typename OutputItr> - void MFRenderingOptions::splitComaSeperatedList(const std::string &s, - OutputItr outItr) { - std::string::const_iterator curPos = s.begin(); - std::string::const_iterator nextComa = std::find(curPos, s.end(), ','); - while (nextComa != s.end()) { - std::string elem; - std::copy(curPos, nextComa, std::back_inserter(elem)); - *outItr = elem; - ++outItr; - curPos = llvm::next(nextComa); - nextComa = std::find(curPos, s.end(), ','); - } - - if (curPos != s.end()) { - std::string elem; - std::copy(curPos, s.end(), std::back_inserter(elem)); - *outItr = elem; - ++outItr; - } - } - - void MFRenderingOptions::processOptions() { - if (!renderingOptionsProcessed) { - processFuncNames(); - processRegClassNames(); - processIntervalNumbers(); - renderingOptionsProcessed = true; - } - } - - void MFRenderingOptions::processFuncNames() { - if (machineFuncsToRender == "*") { - renderAllMFs = true; - } else { - splitComaSeperatedList(machineFuncsToRender, - std::inserter(mfNamesToRender, - mfNamesToRender.begin())); - } - } - - void MFRenderingOptions::processRegClassNames() { - if (pressureClasses == "*") { - renderAllClasses = true; - } else { - splitComaSeperatedList(pressureClasses, - std::inserter(classNamesToRender, - classNamesToRender.begin())); - } - } - - void MFRenderingOptions::processIntervalNumbers() { - std::set<std::string> intervalRanges; - splitComaSeperatedList(showIntervals, - std::inserter(intervalRanges, - intervalRanges.begin())); - std::for_each(intervalRanges.begin(), intervalRanges.end(), - processIntervalRange); - } - - void MFRenderingOptions::processIntervalRange( - const std::string &intervalRangeStr) { - if (intervalRangeStr == "*") { - intervalTypesToRender |= All; - } else if (intervalRangeStr == "virt-nospills*") { - intervalTypesToRender |= VirtNoSpills; - } else if (intervalRangeStr == "spills*") { - intervalTypesToRender |= VirtSpills; - } else if (intervalRangeStr == "virt*") { - intervalTypesToRender |= AllVirt; - } else if (intervalRangeStr == "phys*") { - intervalTypesToRender |= AllPhys; - } else { - std::istringstream iss(intervalRangeStr); - unsigned reg1, reg2; - if ((iss >> reg1 >> std::ws)) { - if (iss.eof()) { - intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1)); - } else { - char c; - iss >> c; - if (c == '-' && (iss >> reg2)) { - intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1)); - } else { - dbgs() << "Warning: Invalid interval range \"" - << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; - } - } - } else { - dbgs() << "Warning: Invalid interval number \"" - << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; - } - } - } - - void MFRenderingOptions::setup(MachineFunction *mf, - const TargetRegisterInfo *tri, - LiveIntervals *lis, - const RenderMachineFunction *rmf) { - this->mf = mf; - this->tri = tri; - this->lis = lis; - this->rmf = rmf; - - clear(); - } - - void MFRenderingOptions::clear() { - regClassesTranslatedToCurrentFunction = false; - regClassSet.clear(); - - intervalsTranslatedToCurrentFunction = false; - intervalSet.clear(); - } - - void MFRenderingOptions::resetRenderSpecificOptions() { - intervalSet.clear(); - intervalsTranslatedToCurrentFunction = false; - } - - bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const { - processOptions(); - - return (renderAllMFs || - mfNamesToRender.find(mf->getFunction()->getName()) != - mfNamesToRender.end()); - } - - const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{ - translateRegClassNamesToCurrentFunction(); - return regClassSet; - } - - const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const { - translateIntervalNumbersToCurrentFunction(); - return intervalSet; - } - - bool MFRenderingOptions::renderEmptyIndexes() const { - return showEmptyIndexes; - } - - bool MFRenderingOptions::fancyVerticals() const { - return useFancyVerticals; - } - - void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const { - if (!regClassesTranslatedToCurrentFunction) { - processOptions(); - for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - if (renderAllClasses || - classNamesToRender.find(trc->getName()) != - classNamesToRender.end()) { - regClassSet.insert(trc); - } - } - regClassesTranslatedToCurrentFunction = true; - } - } - - void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const { - if (!intervalsTranslatedToCurrentFunction) { - processOptions(); - - // If we're not just doing explicit then do a copy over all matching - // types. - if (intervalTypesToRender != ExplicitOnly) { - for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval *li = liItr->second; - - if (filterEmpty && li->empty()) - continue; - - if ((TargetRegisterInfo::isPhysicalRegister(li->reg) && - (intervalTypesToRender & AllPhys))) { - intervalSet.insert(li); - } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) { - if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) || - ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) { - intervalSet.insert(li); - } - } - } - } - - // If we need to process the explicit list... - if (intervalTypesToRender != All) { - for (std::set<std::pair<unsigned, unsigned> >::const_iterator - regRangeItr = intervalNumsToRender.begin(), - regRangeEnd = intervalNumsToRender.end(); - regRangeItr != regRangeEnd; ++regRangeItr) { - const std::pair<unsigned, unsigned> &range = *regRangeItr; - for (unsigned reg = range.first; reg != range.second; ++reg) { - if (lis->hasInterval(reg)) { - intervalSet.insert(&lis->getInterval(reg)); - } - } - } - } - - intervalsTranslatedToCurrentFunction = true; - } - } - - // ---------- TargetRegisterExtraInformation implementation ---------- - - TargetRegisterExtraInfo::TargetRegisterExtraInfo() - : mapsPopulated(false) { - } - - void TargetRegisterExtraInfo::setup(MachineFunction *mf, - MachineRegisterInfo *mri, - const TargetRegisterInfo *tri, - LiveIntervals *lis) { - this->mf = mf; - this->mri = mri; - this->tri = tri; - this->lis = lis; - } - - void TargetRegisterExtraInfo::reset() { - if (!mapsPopulated) { - initWorst(); - //initBounds(); - initCapacity(); - mapsPopulated = true; - } - - resetPressureAndLiveStates(); - } - - void TargetRegisterExtraInfo::clear() { - prWorst.clear(); - vrWorst.clear(); - capacityMap.clear(); - pressureMap.clear(); - //liveStatesMap.clear(); - mapsPopulated = false; - } - - void TargetRegisterExtraInfo::initWorst() { - assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() && - "Worst map already initialised?"); - - // Start with the physical registers. - for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) { - WorstMapLine &pregLine = prWorst[preg]; - - for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - - unsigned numOverlaps = 0; - for (TargetRegisterClass::iterator rItr = trc->begin(), - rEnd = trc->end(); - rItr != rEnd; ++rItr) { - unsigned trcPReg = *rItr; - if (tri->regsOverlap(preg, trcPReg)) - ++numOverlaps; - } - - pregLine[trc] = numOverlaps; - } - } - - // Now the register classes. - for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rc1Itr != rcEnd; ++rc1Itr) { - const TargetRegisterClass *trc1 = *rc1Itr; - WorstMapLine &classLine = vrWorst[trc1]; - - for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin(); - rc2Itr != rcEnd; ++rc2Itr) { - const TargetRegisterClass *trc2 = *rc2Itr; - - unsigned worst = 0; - - for (TargetRegisterClass::iterator trc1Itr = trc1->begin(), - trc1End = trc1->end(); - trc1Itr != trc1End; ++trc1Itr) { - unsigned trc1Reg = *trc1Itr; - unsigned trc1RegWorst = 0; - - for (TargetRegisterClass::iterator trc2Itr = trc2->begin(), - trc2End = trc2->end(); - trc2Itr != trc2End; ++trc2Itr) { - unsigned trc2Reg = *trc2Itr; - if (tri->regsOverlap(trc1Reg, trc2Reg)) - ++trc1RegWorst; - } - if (trc1RegWorst > worst) { - worst = trc1RegWorst; - } - } - - if (worst != 0) { - classLine[trc2] = worst; - } - } - } - } - - unsigned TargetRegisterExtraInfo::getWorst( - unsigned reg, - const TargetRegisterClass *trc) const { - const WorstMapLine *wml = 0; - if (TargetRegisterInfo::isPhysicalRegister(reg)) { - PRWorstMap::const_iterator prwItr = prWorst.find(reg); - assert(prwItr != prWorst.end() && "Missing prWorst entry."); - wml = &prwItr->second; - } else { - const TargetRegisterClass *regTRC = mri->getRegClass(reg); - VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC); - assert(vrwItr != vrWorst.end() && "Missing vrWorst entry."); - wml = &vrwItr->second; - } - - WorstMapLine::const_iterator wmlItr = wml->find(trc); - if (wmlItr == wml->end()) - return 0; - - return wmlItr->second; - } - - void TargetRegisterExtraInfo::initCapacity() { - assert(!mapsPopulated && capacityMap.empty() && - "Capacity map already initialised?"); - - for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - unsigned capacity = trc->getRawAllocationOrder(*mf).size(); - - if (capacity != 0) - capacityMap[trc] = capacity; - } - } - - unsigned TargetRegisterExtraInfo::getCapacity( - const TargetRegisterClass *trc) const { - CapacityMap::const_iterator cmItr = capacityMap.find(trc); - assert(cmItr != capacityMap.end() && - "vreg with unallocable register class"); - return cmItr->second; - } - - void TargetRegisterExtraInfo::resetPressureAndLiveStates() { - pressureMap.clear(); - //liveStatesMap.clear(); - - // Iterate over all slots. - - - // Iterate over all live intervals. - for (LiveIntervals::iterator liItr = lis->begin(), - liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval *li = liItr->second; - - if (TargetRegisterInfo::isPhysicalRegister(li->reg)) - continue; - - // For all ranges in the current interal. - for (LiveInterval::iterator lrItr = li->begin(), - lrEnd = li->end(); - lrItr != lrEnd; ++lrItr) { - LiveRange *lr = &*lrItr; - - // For all slots in the current range. - for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) { - - // Record increased pressure at index for all overlapping classes. - for (TargetRegisterInfo::regclass_iterator - rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - - if (trc->getRawAllocationOrder(*mf).empty()) - continue; - - unsigned worstAtI = getWorst(li->reg, trc); - - if (worstAtI != 0) { - pressureMap[i][trc] += worstAtI; - } - } - } - } - } - } - - unsigned TargetRegisterExtraInfo::getPressureAtSlot( - const TargetRegisterClass *trc, - SlotIndex i) const { - PressureMap::const_iterator pmItr = pressureMap.find(i); - if (pmItr == pressureMap.end()) - return 0; - const PressureMapLine &pmLine = pmItr->second; - PressureMapLine::const_iterator pmlItr = pmLine.find(trc); - if (pmlItr == pmLine.end()) - return 0; - return pmlItr->second; - } - - bool TargetRegisterExtraInfo::classOverCapacityAtSlot( - const TargetRegisterClass *trc, - SlotIndex i) const { - return (getPressureAtSlot(trc, i) > getCapacity(trc)); - } - - // ---------- MachineFunctionRenderer implementation ---------- - - void RenderMachineFunction::Spacer::print(raw_ostream &os) const { - if (!prettyHTML) - return; - for (unsigned i = 0; i < ns; ++i) { - os << " "; - } - } - - RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const { - return Spacer(ns); - } - - raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) { - s.print(os); - return os; - } - - template <typename Iterator> - std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const { - std::string r; - - for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) { - char c = *sItr; - - switch (c) { - case '<': r.append("<"); break; - case '>': r.append(">"); break; - case '&': r.append("&"); break; - case ' ': r.append(" "); break; - case '\"': r.append("""); break; - default: r.push_back(c); break; - } - } - - return r; - } - - RenderMachineFunction::LiveState - RenderMachineFunction::getLiveStateAt(const LiveInterval *li, - SlotIndex i) const { - const MachineInstr *mi = sis->getInstructionFromIndex(i); - - // For uses/defs recorded use/def indexes override current liveness and - // instruction operands (Only for the interval which records the indexes). - // FIXME: This is all wrong, uses and defs share the same slots. - if (i.isEarlyClobber() || i.isRegister()) { - UseDefs::const_iterator udItr = useDefs.find(li); - if (udItr != useDefs.end()) { - const SlotSet &slotSet = udItr->second; - if (slotSet.count(i)) { - if (i.isEarlyClobber()) { - return Used; - } - // else - return Defined; - } - } - } - - // If the slot is a load/store, or there's no info in the use/def set then - // use liveness and instruction operand info. - if (li->liveAt(i)) { - - if (mi == 0) { - if (vrm == 0 || - (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { - return AliveReg; - } else { - return AliveStack; - } - } else { - if (i.isRegister() && mi->definesRegister(li->reg, tri)) { - return Defined; - } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) { - return Used; - } else { - if (vrm == 0 || - (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { - return AliveReg; - } else { - return AliveStack; - } - } - } - } - return Dead; - } - - RenderMachineFunction::PressureState - RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc, - SlotIndex i) const { - if (trei.getPressureAtSlot(trc, i) == 0) { - return Zero; - } else if (trei.classOverCapacityAtSlot(trc, i)){ - return High; - } - return Low; - } - - /// \brief Render a machine instruction. - void RenderMachineFunction::renderMachineInstr(raw_ostream &os, - const MachineInstr *mi) const { - std::string s; - raw_string_ostream oss(s); - oss << *mi; - - os << escapeChars(oss.str()); - } - - template <typename T> - void RenderMachineFunction::renderVertical(const Spacer &indent, - raw_ostream &os, - const T &t) const { - if (ro.fancyVerticals()) { - os << indent << "<object\n" - << indent + s(2) << "class=\"obj\"\n" - << indent + s(2) << "type=\"image/svg+xml\"\n" - << indent + s(2) << "width=\"14px\"\n" - << indent + s(2) << "height=\"55px\"\n" - << indent + s(2) << "data=\"data:image/svg+xml,\n" - << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n" - << indent + s(6) << "<text x='-55' y='10' " - "font-family='Courier' font-size='12' " - "transform='rotate(-90)' " - "text-rendering='optimizeSpeed' " - "fill='#000'>" << t << "</text>\n" - << indent + s(4) << "</svg>\">\n" - << indent << "</object>\n"; - } else { - std::ostringstream oss; - oss << t; - std::string tStr(oss.str()); - - os << indent; - for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end(); - tStrItr != tStrEnd; ++tStrItr) { - os << *tStrItr << "<br/>"; - } - os << "\n"; - } - } - - void RenderMachineFunction::insertCSS(const Spacer &indent, - raw_ostream &os) const { - os << indent << "<style type=\"text/css\">\n" - << indent + s(2) << "body { font-color: black; }\n" - << indent + s(2) << "table.code td { font-family: monospace; " - "border-width: 0px; border-style: solid; " - "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n" - << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n" - << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n" - << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n" - << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n" - << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n" - << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n" - << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n" - << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n" - << indent + s(2) << "table.code th { border-width: 0px; " - "border-style: solid; }\n" - << indent << "</style>\n"; - } - - void RenderMachineFunction::renderFunctionSummary( - const Spacer &indent, raw_ostream &os, - const char * const renderContextStr) const { - os << indent << "<h1>Function: " << mf->getFunction()->getName() - << "</h1>\n" - << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n"; - } - - - void RenderMachineFunction::renderPressureTableLegend( - const Spacer &indent, - raw_ostream &os) const { - os << indent << "<h2>Rendering Pressure Legend:</h2>\n" - << indent << "<table class=\"code\">\n" - << indent + s(2) << "<tr>\n" - << indent + s(4) << "<th>Pressure</th><th>Description</th>" - "<th>Appearance</th>\n" - << indent + s(2) << "</tr>\n" - << indent + s(2) << "<tr>\n" - << indent + s(4) << "<td>No Pressure</td>" - "<td>No physical registers of this class requested.</td>" - "<td class=\"p-z\"> </td>\n" - << indent + s(2) << "</tr>\n" - << indent + s(2) << "<tr>\n" - << indent + s(4) << "<td>Low Pressure</td>" - "<td>Sufficient physical registers to meet demand.</td>" - "<td class=\"p-l\"> </td>\n" - << indent + s(2) << "</tr>\n" - << indent + s(2) << "<tr>\n" - << indent + s(4) << "<td>High Pressure</td>" - "<td>Potentially insufficient physical registers to meet demand.</td>" - "<td class=\"p-h\"> </td>\n" - << indent + s(2) << "</tr>\n" - << indent << "</table>\n"; - } - - template <typename CellType> - void RenderMachineFunction::renderCellsWithRLE( - const Spacer &indent, raw_ostream &os, - const std::pair<CellType, unsigned> &rleAccumulator, - const std::map<CellType, std::string> &cellTypeStrs) const { - - if (rleAccumulator.second == 0) - return; - - typename std::map<CellType, std::string>::const_iterator ctsItr = - cellTypeStrs.find(rleAccumulator.first); - - assert(ctsItr != cellTypeStrs.end() && "No string for given cell type."); - - os << indent + s(4) << "<td class=\"" << ctsItr->second << "\""; - if (rleAccumulator.second > 1) - os << " colspan=" << rleAccumulator.second; - os << "></td>\n"; - } - - - void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent, - raw_ostream &os) const { - - std::map<LiveState, std::string> lsStrs; - lsStrs[Dead] = "l-n"; - lsStrs[Defined] = "l-d"; - lsStrs[Used] = "l-u"; - lsStrs[AliveReg] = "l-r"; - lsStrs[AliveStack] = "l-s"; - - std::map<PressureState, std::string> psStrs; - psStrs[Zero] = "p-z"; - psStrs[Low] = "p-l"; - psStrs[High] = "p-h"; - - // Open the table... - - os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n" - << indent + s(2) << "<tr>\n"; - - // Render the header row... - - os << indent + s(4) << "<th>index</th>\n" - << indent + s(4) << "<th>instr</th>\n"; - - // Render class names if necessary... - if (!ro.regClasses().empty()) { - for (MFRenderingOptions::RegClassSet::const_iterator - rcItr = ro.regClasses().begin(), - rcEnd = ro.regClasses().end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - os << indent + s(4) << "<th>\n"; - renderVertical(indent + s(6), os, trc->getName()); - os << indent + s(4) << "</th>\n"; - } - } - - // FIXME: Is there a nicer way to insert space between columns in HTML? - if (!ro.regClasses().empty() && !ro.intervals().empty()) - os << indent + s(4) << "<th> </th>\n"; - - // Render interval numbers if necessary... - if (!ro.intervals().empty()) { - for (MFRenderingOptions::IntervalSet::const_iterator - liItr = ro.intervals().begin(), - liEnd = ro.intervals().end(); - liItr != liEnd; ++liItr) { - - const LiveInterval *li = *liItr; - os << indent + s(4) << "<th>\n"; - renderVertical(indent + s(6), os, li->reg); - os << indent + s(4) << "</th>\n"; - } - } - - os << indent + s(2) << "</tr>\n"; - - // End header row, start with the data rows... - - MachineInstr *mi = 0; - - // Data rows: - for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex(); - i = i.getNextSlot()) { - - // Render the slot column. - os << indent + s(2) << "<tr height=6ex>\n"; - - // Render the code column. - if (i.isBlock()) { - MachineBasicBlock *mbb = sis->getMBBFromIndex(i); - mi = sis->getInstructionFromIndex(i); - - if (i == sis->getMBBStartIdx(mbb) || mi != 0 || - ro.renderEmptyIndexes()) { - os << indent + s(4) << "<td rowspan=4>" << i << " </td>\n" - << indent + s(4) << "<td rowspan=4>\n"; - - if (i == sis->getMBBStartIdx(mbb)) { - os << indent + s(6) << "BB#" << mbb->getNumber() << ": \n"; - } else if (mi != 0) { - os << indent + s(6) << " "; - renderMachineInstr(os, mi); - } else { - // Empty interval - leave blank. - } - os << indent + s(4) << "</td>\n"; - } else { - i = i.getDeadSlot(); // <- Will be incremented to the next index. - continue; - } - } - - // Render the class columns. - if (!ro.regClasses().empty()) { - std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0); - for (MFRenderingOptions::RegClassSet::const_iterator - rcItr = ro.regClasses().begin(), - rcEnd = ro.regClasses().end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - PressureState newPressure = getPressureStateAt(trc, i); - - if (newPressure == psRLEAccumulator.first) { - ++psRLEAccumulator.second; - } else { - renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); - psRLEAccumulator.first = newPressure; - psRLEAccumulator.second = 1; - } - } - renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); - } - - // FIXME: Is there a nicer way to insert space between columns in HTML? - if (!ro.regClasses().empty() && !ro.intervals().empty()) - os << indent + s(4) << "<td width=2em></td>\n"; - - if (!ro.intervals().empty()) { - std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0); - for (MFRenderingOptions::IntervalSet::const_iterator - liItr = ro.intervals().begin(), - liEnd = ro.intervals().end(); - liItr != liEnd; ++liItr) { - const LiveInterval *li = *liItr; - LiveState newLiveness = getLiveStateAt(li, i); - - if (newLiveness == lsRLEAccumulator.first) { - ++lsRLEAccumulator.second; - } else { - renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); - lsRLEAccumulator.first = newLiveness; - lsRLEAccumulator.second = 1; - } - } - renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); - } - os << indent + s(2) << "</tr>\n"; - } - - os << indent << "</table>\n"; - - if (!ro.regClasses().empty()) - renderPressureTableLegend(indent, os); - } - - void RenderMachineFunction::renderFunctionPage( - raw_ostream &os, - const char * const renderContextStr) const { - os << "<html>\n" - << s(2) << "<head>\n" - << s(4) << "<title>" << fqn << "</title>\n"; - - insertCSS(s(4), os); - - os << s(2) << "<head>\n" - << s(2) << "<body >\n"; - - renderFunctionSummary(s(4), os, renderContextStr); - - os << s(4) << "<br/><br/><br/>\n"; - - //renderLiveIntervalInfoTable(" ", os); - - os << s(4) << "<br/><br/><br/>\n"; - - renderCodeTablePlusPI(s(4), os); - - os << s(2) << "</body>\n" - << "</html>\n"; - } - - void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const { - au.addRequired<SlotIndexes>(); - au.addRequired<LiveIntervals>(); - au.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(au); - } - - bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) { - - mf = &fn; - mri = &mf->getRegInfo(); - tri = mf->getTarget().getRegisterInfo(); - lis = &getAnalysis<LiveIntervals>(); - sis = &getAnalysis<SlotIndexes>(); - - trei.setup(mf, mri, tri, lis); - ro.setup(mf, tri, lis, this); - spillIntervals.clear(); - spillFor.clear(); - useDefs.clear(); - - fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + - mf->getFunction()->getName().str(); - - return false; - } - - void RenderMachineFunction::releaseMemory() { - trei.clear(); - ro.clear(); - spillIntervals.clear(); - spillFor.clear(); - useDefs.clear(); - } - - void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) { - - if (!ro.shouldRenderCurrentMachineFunction()) - return; - - for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg), - rEnd = mri->reg_end(); - rItr != rEnd; ++rItr) { - const MachineInstr *mi = &*rItr; - if (mi->readsRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true)); - } - if (mi->definesRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot()); - } - } - } - - void RenderMachineFunction::rememberSpills( - const LiveInterval *li, - const std::vector<LiveInterval*> &spills) { - - if (!ro.shouldRenderCurrentMachineFunction()) - return; - - for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(), - siEnd = spills.end(); - siItr != siEnd; ++siItr) { - const LiveInterval *spill = *siItr; - spillIntervals[li].insert(spill); - spillFor[spill] = li; - } - } - - bool RenderMachineFunction::isSpill(const LiveInterval *li) const { - SpillForMap::const_iterator sfItr = spillFor.find(li); - if (sfItr == spillFor.end()) - return false; - return true; - } - - void RenderMachineFunction::renderMachineFunction( - const char *renderContextStr, - const VirtRegMap *vrm, - const char *renderSuffix) { - if (!ro.shouldRenderCurrentMachineFunction()) - return; - - this->vrm = vrm; - trei.reset(); - - std::string rpFileName(mf->getFunction()->getName().str() + - (renderSuffix ? renderSuffix : "") + - outputFileSuffix); - - std::string errMsg; - raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary); - - renderFunctionPage(outFile, renderContextStr); - - ro.resetRenderSpecificOptions(); - } - - std::string RenderMachineFunction::escapeChars(const std::string &s) const { - return escapeChars(s.begin(), s.end()); - } - -} diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h deleted file mode 100644 index 8571992..0000000 --- a/lib/CodeGen/RenderMachineFunction.h +++ /dev/null @@ -1,338 +0,0 @@ -//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H -#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H - -#include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/Target/TargetRegisterInfo.h" - -#include <algorithm> -#include <map> -#include <set> -#include <string> - -namespace llvm { - - class LiveInterval; - class LiveIntervals; - class MachineInstr; - class MachineRegisterInfo; - class RenderMachineFunction; - class TargetRegisterClass; - class TargetRegisterInfo; - class VirtRegMap; - class raw_ostream; - - /// \brief Helper class to process rendering options. Tries to be as lazy as - /// possible. - class MFRenderingOptions { - public: - - struct RegClassComp { - bool operator()(const TargetRegisterClass *trc1, - const TargetRegisterClass *trc2) const { - std::string trc1Name(trc1->getName()), trc2Name(trc2->getName()); - return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(), - trc2Name.begin(), trc2Name.end()); - } - }; - - typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet; - - struct IntervalComp { - bool operator()(const LiveInterval *li1, const LiveInterval *li2) const { - return li1->reg < li2->reg; - } - }; - - typedef std::set<const LiveInterval*, IntervalComp> IntervalSet; - - /// Initialise the rendering options. - void setup(MachineFunction *mf, const TargetRegisterInfo *tri, - LiveIntervals *lis, const RenderMachineFunction *rmf); - - /// Clear translations of options to the current function. - void clear(); - - /// Reset any options computed for this specific rendering. - void resetRenderSpecificOptions(); - - /// Should we render the current function. - bool shouldRenderCurrentMachineFunction() const; - - /// Return the set of register classes to render pressure for. - const RegClassSet& regClasses() const; - - /// Return the set of live intervals to render liveness for. - const IntervalSet& intervals() const; - - /// Render indexes which are not associated with instructions / MBB starts. - bool renderEmptyIndexes() const; - - /// Return whether or not to render using SVG for fancy vertical text. - bool fancyVerticals() const; - - private: - - static bool renderingOptionsProcessed; - static std::set<std::string> mfNamesToRender; - static bool renderAllMFs; - - static std::set<std::string> classNamesToRender; - static bool renderAllClasses; - - - static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender; - typedef enum { ExplicitOnly = 0, - AllPhys = 1, - VirtNoSpills = 2, - VirtSpills = 4, - AllVirt = 6, - All = 7 } - IntervalTypesToRender; - static unsigned intervalTypesToRender; - - template <typename OutputItr> - static void splitComaSeperatedList(const std::string &s, OutputItr outItr); - - static void processOptions(); - - static void processFuncNames(); - static void processRegClassNames(); - static void processIntervalNumbers(); - - static void processIntervalRange(const std::string &intervalRangeStr); - - MachineFunction *mf; - const TargetRegisterInfo *tri; - LiveIntervals *lis; - const RenderMachineFunction *rmf; - - mutable bool regClassesTranslatedToCurrentFunction; - mutable RegClassSet regClassSet; - - mutable bool intervalsTranslatedToCurrentFunction; - mutable IntervalSet intervalSet; - - void translateRegClassNamesToCurrentFunction() const; - - void translateIntervalNumbersToCurrentFunction() const; - }; - - /// \brief Provide extra information about the physical and virtual registers - /// in the function being compiled. - class TargetRegisterExtraInfo { - public: - TargetRegisterExtraInfo(); - - /// \brief Set up TargetRegisterExtraInfo with pointers to necessary - /// sources of information. - void setup(MachineFunction *mf, MachineRegisterInfo *mri, - const TargetRegisterInfo *tri, LiveIntervals *lis); - - /// \brief Recompute tables for changed function. - void reset(); - - /// \brief Free all tables in TargetRegisterExtraInfo. - void clear(); - - /// \brief Maximum number of registers from trc which alias reg. - unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const; - - /// \brief Returns the number of allocable registers in trc. - unsigned getCapacity(const TargetRegisterClass *trc) const; - - /// \brief Return the number of registers of class trc that may be - /// needed at slot i. - unsigned getPressureAtSlot(const TargetRegisterClass *trc, - SlotIndex i) const; - - /// \brief Return true if the number of registers of type trc that may be - /// needed at slot i is greater than the capacity of trc. - bool classOverCapacityAtSlot(const TargetRegisterClass *trc, - SlotIndex i) const; - - private: - - MachineFunction *mf; - MachineRegisterInfo *mri; - const TargetRegisterInfo *tri; - LiveIntervals *lis; - - typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine; - typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap; - VRWorstMap vrWorst; - - typedef std::map<unsigned, WorstMapLine> PRWorstMap; - PRWorstMap prWorst; - - typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap; - CapacityMap capacityMap; - - typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine; - typedef std::map<SlotIndex, PressureMapLine> PressureMap; - PressureMap pressureMap; - - bool mapsPopulated; - - /// \brief Initialise the 'worst' table. - void initWorst(); - - /// \brief Initialise the 'capacity' table. - void initCapacity(); - - /// \brief Initialise/Reset the 'pressure' and live states tables. - void resetPressureAndLiveStates(); - }; - - /// \brief Render MachineFunction objects and related information to a HTML - /// page. - class RenderMachineFunction : public MachineFunctionPass { - public: - static char ID; - - RenderMachineFunction() : MachineFunctionPass(ID) { - initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &au) const; - - virtual bool runOnMachineFunction(MachineFunction &fn); - - virtual void releaseMemory(); - - void rememberUseDefs(const LiveInterval *li); - - void rememberSpills(const LiveInterval *li, - const std::vector<LiveInterval*> &spills); - - bool isSpill(const LiveInterval *li) const; - - /// \brief Render this machine function to HTML. - /// - /// @param renderContextStr This parameter will be included in the top of - /// the html file to explain where (in the - /// codegen pipeline) this function was rendered - /// from. Set it to something like - /// "Pre-register-allocation". - /// @param vrm If non-null the VRM will be queried to determine - /// whether a virtual register was allocated to a - /// physical register or spilled. - /// @param renderFilePrefix This string will be appended to the function - /// name (before the output file suffix) to enable - /// multiple renderings from the same function. - void renderMachineFunction(const char *renderContextStr, - const VirtRegMap *vrm = 0, - const char *renderSuffix = 0); - - private: - class Spacer; - friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s); - - std::string fqn; - - MachineFunction *mf; - MachineRegisterInfo *mri; - const TargetRegisterInfo *tri; - LiveIntervals *lis; - SlotIndexes *sis; - const VirtRegMap *vrm; - - TargetRegisterExtraInfo trei; - MFRenderingOptions ro; - - - - // Utilities. - typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState; - LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const; - - typedef enum { Zero, Low, High } PressureState; - PressureState getPressureStateAt(const TargetRegisterClass *trc, - SlotIndex i) const; - - typedef std::map<const LiveInterval*, std::set<const LiveInterval*> > - SpillIntervals; - SpillIntervals spillIntervals; - - typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap; - SpillForMap spillFor; - - typedef std::set<SlotIndex> SlotSet; - typedef std::map<const LiveInterval*, SlotSet> UseDefs; - UseDefs useDefs; - - // ---------- Rendering methods ---------- - - /// For inserting spaces when pretty printing. - class Spacer { - public: - explicit Spacer(unsigned numSpaces) : ns(numSpaces) {} - Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); } - void print(raw_ostream &os) const; - private: - unsigned ns; - }; - - Spacer s(unsigned ns) const; - - template <typename Iterator> - std::string escapeChars(Iterator sBegin, Iterator sEnd) const; - - /// \brief Render a machine instruction. - void renderMachineInstr(raw_ostream &os, - const MachineInstr *mi) const; - - /// \brief Render vertical text. - template <typename T> - void renderVertical(const Spacer &indent, - raw_ostream &os, - const T &t) const; - - /// \brief Insert CSS layout info. - void insertCSS(const Spacer &indent, - raw_ostream &os) const; - - /// \brief Render a brief summary of the function (including rendering - /// context). - void renderFunctionSummary(const Spacer &indent, - raw_ostream &os, - const char * const renderContextStr) const; - - /// \brief Render a legend for the pressure table. - void renderPressureTableLegend(const Spacer &indent, - raw_ostream &os) const; - - /// \brief Render a consecutive set of HTML cells of the same class using - /// the colspan attribute for run-length encoding. - template <typename CellType> - void renderCellsWithRLE( - const Spacer &indent, raw_ostream &os, - const std::pair<CellType, unsigned> &rleAccumulator, - const std::map<CellType, std::string> &cellTypeStrs) const; - - /// \brief Render code listing, potentially with register pressure - /// and live intervals shown alongside. - void renderCodeTablePlusPI(const Spacer &indent, - raw_ostream &os) const; - - /// \brief Render the HTML page representing the MachineFunction. - void renderFunctionPage(raw_ostream &os, - const char * const renderContextStr) const; - - std::string escapeChars(const std::string &s) const; - }; -} - -#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */ diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 8fd6426..752f8e4 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -64,10 +64,27 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { /// specified node. bool SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. - for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end(); - I != E; ++I) - if (*I == D) + for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { + if (I->overlaps(D)) { + // Extend the latency if needed. Equivalent to removePred(I) + addPred(D). + if (I->getLatency() < D.getLatency()) { + SUnit *PredSU = I->getSUnit(); + // Find the corresponding successor in N. + SDep ForwardD = *I; + ForwardD.setSUnit(this); + for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(), + EE = PredSU->Succs.end(); II != EE; ++II) { + if (*II == ForwardD) { + II->setLatency(D.getLatency()); + break; + } + } + I->setLatency(D.getLatency()); + } return false; + } + } // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index d46eb89..9c1dba3 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -21,17 +21,24 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" using namespace llvm; +static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Enable use of AA during MI GAD construction")); + ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt, @@ -40,7 +47,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false), - LoopRegs(MLI, MDT), FirstDbgValue(0) { + LoopRegs(MDT), FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -126,7 +133,8 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, return 0; } -void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { +void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { + BB = bb; LoopRegs.Deps.clear(); if (MachineLoop *ML = MLI.getLoopFor(BB)) if (BB == ML->getLoopLatch()) @@ -134,7 +142,8 @@ void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { } void ScheduleDAGInstrs::finishBlock() { - // Nothing to do. + // Subclasses should no longer refer to the old block. + BB = 0; } /// Initialize the map with the number of registers. @@ -159,7 +168,7 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned endcount) { - BB = bb; + assert(bb == BB && "startBlock should set BB"); RegionBegin = begin; RegionEnd = end; EndIndex = endcount; @@ -232,7 +241,8 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); unsigned DataLatency = SU->Latency; - for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { + for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); + Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; std::vector<SUnit*> &UseList = Uses[*Alias]; @@ -261,10 +271,12 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, // Adjust the dependence latency using operand def/use // information (if any), and then allow the target to // perform its own adjustments. - const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); + SDep dep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { - computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); - ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); + unsigned Latency = computeOperandLatency(SU, UseSU, dep); + dep.setLatency(Latency); + + ST.adjustSchedDependency(SU, UseSU, dep); } UseSU->addPred(dep); } @@ -285,7 +297,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // TODO: Using a latency of 1 here for output dependencies assumes // there's no cost for reusing registers. SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; - for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { + for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); + Alias.isValid(); ++Alias) { if (!Defs.contains(*Alias)) continue; std::vector<SUnit *> &DefList = Defs[*Alias]; @@ -398,9 +411,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); unsigned Reg = MI->getOperand(OperIdx).getReg(); - // SSA defs do not have output/anti dependencies. + // Singly defined vregs do not have output/anti dependencies. // The current operand is a def, so we have at least one. - if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) + // Check here if there are any others... + if (MRI.hasOneDef(Reg)) return; // Add output dependence to the next nearest def of this vreg. @@ -410,7 +424,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { // uses. We're conservative for now until we have a way to guarantee the uses // are not eliminated sometime during scheduling. The output dependence edge // is also useful if output latency exceeds def-use latency. - VReg2SUnitMap::iterator DefI = findVRegDef(Reg); + VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); if (DefI == VRegDefs.end()) VRegDefs.insert(VReg2SUnit(Reg, SU)); else { @@ -436,10 +450,11 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Lookup this operand's reaching definition. assert(LIS && "vreg dependencies requires LiveIntervals"); - SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(); - LiveInterval *LI = &LIS->getInterval(Reg); - VNInfo *VNI = LI->getVNInfoBefore(UseIdx); + LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI)); + VNInfo *VNI = LRQ.valueIn(); + // VNI will be valid because MachineOperand::readsReg() is checked by caller. + assert(VNI && "No value to read by operand"); MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); // Phis and other noninstructions (after coalescing) have a NULL Def. if (Def) { @@ -449,11 +464,13 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Create a data dependence. // // TODO: Handle "special" address latencies cleanly. - const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); + SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg); if (!UnitLatencies) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. - computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); + unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); + dep.setLatency(Latency); + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); } @@ -462,11 +479,217 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { } // Add antidependence to the following def of the vreg it uses. - VReg2SUnitMap::iterator DefI = findVRegDef(Reg); + VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); if (DefI != VRegDefs.end() && DefI->SU != SU) DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); } +/// Return true if MI is an instruction we are unable to reason about +/// (like a call or something with unmodeled side effects). +static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { + if (MI->isCall() || MI->hasUnmodeledSideEffects() || + (MI->hasVolatileMemoryRef() && + (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) + return true; + return false; +} + +// This MI might have either incomplete info, or known to be unsafe +// to deal with (i.e. volatile object). +static inline bool isUnsafeMemoryObject(MachineInstr *MI, + const MachineFrameInfo *MFI) { + if (!MI || MI->memoperands_empty()) + return true; + // We purposefully do no check for hasOneMemOperand() here + // in hope to trigger an assert downstream in order to + // finish implementation. + if ((*MI->memoperands_begin())->isVolatile() || + MI->hasUnmodeledSideEffects()) + return true; + + const Value *V = (*MI->memoperands_begin())->getValue(); + if (!V) + return true; + + V = getUnderlyingObject(V); + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + // Similarly to getUnderlyingObjectForInstr: + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (PSV->isAliased(MFI)) + return true; + } + // Does this pointer refer to a distinct and identifiable object? + if (!isIdentifiedObject(V)) + return true; + + return false; +} + +/// This returns true if the two MIs need a chain edge betwee them. +/// If these are not even memory operations, we still may need +/// chain deps between them. The question really is - could +/// these two MIs be reordered during scheduling from memory dependency +/// point of view. +static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, + MachineInstr *MIa, + MachineInstr *MIb) { + // Cover a trivial case - no edge is need to itself. + if (MIa == MIb) + return false; + + if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI)) + return true; + + // If we are dealing with two "normal" loads, we do not need an edge + // between them - they could be reordered. + if (!MIa->mayStore() && !MIb->mayStore()) + return false; + + // To this point analysis is generic. From here on we do need AA. + if (!AA) + return true; + + MachineMemOperand *MMOa = *MIa->memoperands_begin(); + MachineMemOperand *MMOb = *MIb->memoperands_begin(); + + // FIXME: Need to handle multiple memory operands to support all targets. + if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) + llvm_unreachable("Multiple memory operands."); + + // The following interface to AA is fashioned after DAGCombiner::isAlias + // and operates with MachineMemOperand offset with some important + // assumptions: + // - LLVM fundamentally assumes flat address spaces. + // - MachineOperand offset can *only* result from legalization and + // cannot affect queries other than the trivial case of overlap + // checking. + // - These offsets never wrap and never step outside + // of allocated objects. + // - There should never be any negative offsets here. + // + // FIXME: Modify API to hide this math from "user" + // FIXME: Even before we go to AA we can reason locally about some + // memory objects. It can save compile time, and possibly catch some + // corner cases not currently covered. + + assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); + assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + + int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); + int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; + int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; + + AliasAnalysis::AliasResult AAResult = AA->alias( + AliasAnalysis::Location(MMOa->getValue(), Overlapa, + MMOa->getTBAAInfo()), + AliasAnalysis::Location(MMOb->getValue(), Overlapb, + MMOb->getTBAAInfo())); + + return (AAResult != AliasAnalysis::NoAlias); +} + +/// This recursive function iterates over chain deps of SUb looking for +/// "latest" node that needs a chain edge to SUa. +static unsigned +iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, + SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, + SmallPtrSet<const SUnit*, 16> &Visited) { + if (!SUa || !SUb || SUb == ExitSU) + return *Depth; + + // Remember visited nodes. + if (!Visited.insert(SUb)) + return *Depth; + // If there is _some_ dependency already in place, do not + // descend any further. + // TODO: Need to make sure that if that dependency got eliminated or ignored + // for any reason in the future, we would not violate DAG topology. + // Currently it does not happen, but makes an implicit assumption about + // future implementation. + // + // Independently, if we encounter node that is some sort of global + // object (like a call) we already have full set of dependencies to it + // and we can stop descending. + if (SUa->isSucc(SUb) || + isGlobalMemoryObject(AA, SUb->getInstr())) + return *Depth; + + // If we do need an edge, or we have exceeded depth budget, + // add that edge to the predecessors chain of SUb, + // and stop descending. + if (*Depth > 200 || + MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0, + /*isNormalMemory=*/true)); + return *Depth; + } + // Track current depth. + (*Depth)++; + // Iterate over chain dependencies only. + for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); + I != E; ++I) + if (I->isCtrl()) + iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited); + return *Depth; +} + +/// This function assumes that "downward" from SU there exist +/// tail/leaf of already constructed DAG. It iterates downward and +/// checks whether SU can be aliasing any node dominated +/// by it. +static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, + SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList, + unsigned LatencyToLoad) { + if (!SU) + return; + + SmallPtrSet<const SUnit*, 16> Visited; + unsigned Depth = 0; + + for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end(); + I != IE; ++I) { + if (SU == *I) + continue; + if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) { + unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0; + (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0, + /*isNormalMemory=*/true)); + } + // Now go through all the chain successors and iterate from them. + // Keep track of visited nodes. + for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), + JE = (*I)->Succs.end(); J != JE; ++J) + if (J->isCtrl()) + iterateChainSucc (AA, MFI, SU, J->getSUnit(), + ExitSU, &Depth, Visited); + } +} + +/// Check whether two objects need a chain edge, if so, add it +/// otherwise remember the rejected SU. +static inline +void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, + SUnit *SUa, SUnit *SUb, + std::set<SUnit *> &RejectList, + unsigned TrueMemOrderLatency = 0, + bool isNormalMemory = false) { + // If this is a false dependency, + // do not add the edge, but rememeber the rejected node. + if (!EnableAASchedMI || + MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) + SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0, + isNormalMemory)); + else { + // Duplicate entries should be ignored. + RejectList.insert(SUb); + DEBUG(dbgs() << "\tReject chain dep between SU(" + << SUa->NodeNum << ") and SU(" + << SUb->NodeNum << ")\n"); + } +} + /// Create an SUnit for each real instruction, numbered in top-down toplological /// order. The instruction order A < B, implies that no edge exists from B to A. /// @@ -502,7 +725,11 @@ void ScheduleDAGInstrs::initSUnits() { } } -void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { +/// If RegPressure is non null, compute register pressure as a side effect. The +/// DAG builder is an efficient place to do it because it already visits +/// operands. +void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, + RegPressureTracker *RPTracker) { // Create an SUnit for each real instruction. initSUnits(); @@ -518,6 +745,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { // that are known not to alias std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; + std::set<SUnit*> RejectMemNodes; // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. @@ -553,6 +781,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { PrevMI = MI; continue; } + if (RPTracker) { + RPTracker->recede(); + assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); + } assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() && "Cannot schedule terminators or labels!"); @@ -587,11 +819,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { // after stack slots are lowered to actual addresses. // TODO: Use an AliasAnalysis and do real alias-analysis queries, and // produce more precise dependence information. -#define STORE_LOAD_LATENCY 1 - unsigned TrueMemOrderLatency = 0; - if (MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasVolatileMemoryRef() && - (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { + unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0; + if (isGlobalMemoryObject(AA, MI)) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. for (std::map<const Value *, SUnit *>::iterator I = @@ -603,36 +832,48 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); } - NonAliasMemDefs.clear(); - NonAliasMemUses.clear(); // Add SU to the barrier chain. if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); BarrierChain = SU; + // This is a barrier event that acts as a pivotal node in the DAG, + // so it is safe to clear list of exposed nodes. + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + RejectMemNodes.clear(); + NonAliasMemDefs.clear(); + NonAliasMemUses.clear(); // fall-through new_alias_chain: // Chain all possibly aliasing memory references though SU. - if (AliasChain) - AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + if (AliasChain) { + unsigned ChainLatency = 0; + if (AliasChain->getInstr()->mayLoad()) + ChainLatency = TrueMemOrderLatency; + addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes, + ChainLatency); + } AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), - E = AliasMemDefs.end(); I != E; ++I) { - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - } + E = AliasMemDefs.end(); I != E; ++I) + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); for (std::map<const Value *, std::vector<SUnit *> >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes, + TrueMemOrderLatency); } + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); PendingLoads.clear(); AliasMemDefs.clear(); AliasMemUses.clear(); } else if (MI->mayStore()) { bool MayAlias = true; - TrueMemOrderLatency = STORE_LOAD_LATENCY; if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { // A store to a specific PseudoSourceValue. Add precise dependencies. // Record the def in MemDefs, first adding a dep if there is @@ -642,8 +883,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { std::map<const Value *, SUnit *>::iterator IE = ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, - /*isNormalMemory=*/true)); + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, + 0, true); I->second = SU; } else { if (MayAlias) @@ -658,20 +899,28 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) - J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, - /*Reg=*/0, /*isNormalMemory=*/true)); + addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes, + TrueMemOrderLatency, true); J->second.clear(); } if (MayAlias) { // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); // Add dependence on alias chain, if needed. if (AliasChain) - AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + // But we also should check dependent instructions for the + // SU in question. + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); } // Add dependence on barrier chain, if needed. + // There is no point to check aliasing on barrier event. Even if + // SU and barrier _could_ be reordered, they should not. In addition, + // we have lost all RejectMemNodes below barrier. if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } else { @@ -688,7 +937,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { /*isArtificial=*/true)); } else if (MI->mayLoad()) { bool MayAlias = true; - TrueMemOrderLatency = 0; if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else { @@ -700,8 +948,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { std::map<const Value *, SUnit *>::iterator IE = ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, - /*isNormalMemory=*/true)); + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); if (MayAlias) AliasMemUses[V].push_back(SU); else @@ -711,15 +958,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { // potentially aliasing stores. for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; } - + if (MayAlias) + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) - AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } @@ -735,8 +983,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { } void ScheduleDAGInstrs::computeLatency(SUnit *SU) { - // Compute the latency for the node. - if (!InstrItins || InstrItins->isEmpty()) { + // Compute the latency for the node. We only provide a default for missing + // itineraries. Empty itineraries still have latency properties. + if (!InstrItins) { SU->Latency = 1; // Simplistic target-independent heuristic: assume that loads take @@ -748,63 +997,15 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) { } } -void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, - SDep& dep) const { - if (!InstrItins || InstrItins->isEmpty()) - return; - +unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, + const SDep& dep, + bool FindMin) const { // For a data dependency with a known register... if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) - return; - - const unsigned Reg = dep.getReg(); - - // ... find the definition of the register in the defining - // instruction - MachineInstr *DefMI = Def->getInstr(); - int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); - if (DefIdx != -1) { - const MachineOperand &MO = DefMI->getOperand(DefIdx); - if (MO.isReg() && MO.isImplicit() && - DefIdx >= (int)DefMI->getDesc().getNumOperands()) { - // This is an implicit def, getOperandLatency() won't return the correct - // latency. e.g. - // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> - // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... - // What we want is to compute latency between def of %D6/%D7 and use of - // %Q3 instead. - unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); - if (DefMI->getOperand(Op2).isReg()) - DefIdx = Op2; - } - MachineInstr *UseMI = Use->getInstr(); - // For all uses of the register, calculate the maxmimum latency - int Latency = -1; - if (UseMI) { - for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = UseMI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (MOReg != Reg) - continue; - - int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, - UseMI, i); - Latency = std::max(Latency, UseCycle); - } - } else { - // UseMI is null, then it must be a scheduling barrier. - if (!InstrItins || InstrItins->isEmpty()) - return; - unsigned DefClass = DefMI->getDesc().getSchedClass(); - Latency = InstrItins->getOperandCycle(DefClass, DefIdx); - } + return 1; - // If we found a latency, then replace the existing dependence latency. - if (Latency >= 0) - dep.setLatency(Latency); - } + return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(), + Use->getInstr(), dep.getReg(), FindMin); } void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 3d22035..e675366 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -39,13 +39,11 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II, DebugType = ParentDebugType; #endif - // Determine the maximum depth of any itinerary. This determines the - // depth of the scoreboard. We always make the scoreboard at least 1 - // cycle deep to avoid dealing with the boundary condition. + // Determine the maximum depth of any itinerary. This determines the depth of + // the scoreboard. We always make the scoreboard at least 1 cycle deep to + // avoid dealing with the boundary condition. unsigned ScoreboardDepth = 1; if (ItinData && !ItinData->isEmpty()) { - IssueWidth = ItinData->IssueWidth; - for (unsigned idx = 0; ; ++idx) { if (ItinData->isEndMarker(idx)) break; @@ -63,16 +61,26 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II, // Find the next power-of-2 >= ItinDepth while (ItinDepth > ScoreboardDepth) { ScoreboardDepth *= 2; + // Don't set MaxLookAhead until we find at least one nonzero stage. + // This way, an itinerary with no stages has MaxLookAhead==0, which + // completely bypasses the scoreboard hazard logic. + MaxLookAhead = ScoreboardDepth; } } - MaxLookAhead = ScoreboardDepth; } ReservedScoreboard.reset(ScoreboardDepth); RequiredScoreboard.reset(ScoreboardDepth); - DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " - << ScoreboardDepth << '\n'); + // If MaxLookAhead is not set above, then we are not enabled. + if (!isEnabled()) + DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n"); + else { + // A nonempty itinerary must have a SchedModel. + IssueWidth = ItinData->SchedModel->IssueWidth; + DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " + << ScoreboardDepth << '\n'); + } } void ScoreboardHazardRecognizer::Reset() { @@ -151,7 +159,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { } if (!freeUnits) { - DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); + DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", "); DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); DEBUG(DAG->dumpNode(SU)); return Hazard; diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index a6bdc3b..75e8167 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -23,3 +23,5 @@ add_llvm_library(LLVMSelectionDAG TargetLowering.cpp TargetSelectionDAGInfo.cpp ) + +add_dependencies(LLVMSelectionDAG intrinsics_gen) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0914c66..4e29879 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -215,6 +215,7 @@ namespace { SDValue visitFADD(SDNode *N); SDValue visitFSUB(SDNode *N); SDValue visitFMUL(SDNode *N); + SDValue visitFMA(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); SDValue visitFCOPYSIGN(SDNode *N); @@ -227,6 +228,9 @@ namespace { SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); + SDValue visitFCEIL(SDNode *N); + SDValue visitFTRUNC(SDNode *N); + SDValue visitFFLOOR(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -328,15 +332,12 @@ namespace { class WorkListRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; public: - explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {} + explicit WorkListRemover(DAGCombiner &dc) + : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} virtual void NodeDeleted(SDNode *N, SDNode *E) { DC.removeFromWorkList(N); } - - virtual void NodeUpdated(SDNode *N) { - // Ignore updates. - } }; } @@ -619,8 +620,7 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, N->getValueType(i) == To[i].getValueType()) && "Cannot combine value to value of different type!")); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesWith(N, To, &DeadNodes); - + DAG.ReplaceAllUsesWith(N, To); if (AddTo) { // Push the new nodes and any users onto the worklist for (unsigned i = 0, e = NumTo; i != e; ++i) { @@ -650,7 +650,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. AddToWorkList(TLO.New.getNode()); @@ -707,9 +707,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { Trunc.getNode()->dump(&DAG); dbgs() << '\n'); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes); - DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); removeFromWorkList(Load); DAG.DeleteNode(Load); AddToWorkList(Trunc.getNode()); @@ -961,8 +960,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { Result.getNode()->dump(&DAG); dbgs() << '\n'); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); removeFromWorkList(N); DAG.DeleteNode(N); AddToWorkList(Result.getNode()); @@ -1047,12 +1046,12 @@ void DAGCombiner::Run(CombineLevel AtLevel) { DAG.TransferDbgValues(SDValue(N, 0), RV); WorkListRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) - DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); + DAG.ReplaceAllUsesWith(N, RV.getNode()); else { assert(N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && "Type mismatch"); SDValue OpV = RV; - DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes); + DAG.ReplaceAllUsesWith(N, &OpV); } // Push the new node and any users onto the worklist @@ -1131,6 +1130,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FADD: return visitFADD(N); case ISD::FSUB: return visitFSUB(N); case ISD::FMUL: return visitFMUL(N); + case ISD::FMA: return visitFMA(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); @@ -1143,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); + case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FCEIL: return visitFCEIL(N); + case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); case ISD::BR_CC: return visitBR_CC(N); case ISD::LOAD: return visitLOAD(N); @@ -1325,10 +1328,12 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { // Replacing results may cause a different MERGE_VALUES to suddenly // be CSE'd with N, and carry its uses with it. Iterate until no // uses remain, to ensure that the node can be safely deleted. + // First add the users of this node to the work list so that they + // can be tried again once they have new operands. + AddUsersToWorkList(N); do { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); } while (!N->use_empty()); removeFromWorkList(N); DAG.DeleteNode(N); @@ -1640,7 +1645,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, - N1.getOperand(0)); + N1.getOperand(0)); } // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && @@ -2341,7 +2346,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper // on scalars. if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) - && Level == AfterLegalizeVectorOps) { + && Level == AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); @@ -2528,7 +2533,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { Load->getOffset(), Load->getMemoryVT(), Load->getMemOperand()); // Replace uses of the EXTLOAD with the new ZEXTLOAD. - CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + if (Load->getNumValues() == 3) { + // PRE/POST_INC loads have 3 values. + SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), + NewLoad.getValue(2) }; + CombineTo(Load, To, 3, true); + } else { + CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + } } // Fold the AND away, taking care not to fold to the old load node if we @@ -2710,6 +2722,34 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && + VT.getSizeInBits() <= 64) { + if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal + // immediate for an add, but it is legal if its top c2 bits are set, + // transform the ADD so the immediate doesn't need to be materialized + // in a register. + if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + SRLI->getZExtValue()); + if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { + ADDC |= Mask; + if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDValue NewAdd = + DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + N0.getOperand(0), DAG.getConstant(ADDC, VT)); + CombineTo(N0.getNode(), NewAdd); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + } + } + } + + return SDValue(); } @@ -4526,8 +4566,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); + AddToWorkList(Op.getNode()); } else if (Op.getValueType().bitsGT(VT)) { Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + AddToWorkList(Op.getNode()); } return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType().getScalarType()); @@ -5012,6 +5054,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT PtrType = N0.getOperand(1).getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + // It's not possible to generate a constant of extended or untyped type. + return SDValue(); + // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (TLI.isBigEndian()) { @@ -5041,8 +5087,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); // Shift the result left, if we've swallowed a left shift. SDValue Result = Load; @@ -5225,7 +5270,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - + EVT IndexTy = N0->getOperand(1).getValueType(); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), @@ -5233,7 +5278,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), TrTy, V, - DAG.getConstant(Index, MVT::i32)); + DAG.getConstant(Index, IndexTy)); } } @@ -5607,7 +5652,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (FoldedVOp.getNode()) return FoldedVOp; } - // fold (fadd c1, c2) -> (fadd c1, c2) + // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); // canonicalize constant to RHS @@ -5636,6 +5681,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // FADD -> FMA combines: + if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || + DAG.getTarget().Options.UnsafeFPMath) && + DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + + // fold (fadd x, (fmul y, z)) -> (fma x, y, z) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N1.getOperand(0), N1.getOperand(1), N0); + } + } + return SDValue(); } @@ -5645,6 +5710,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); // fold vector ops if (VT.isVector()) { @@ -5665,17 +5731,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); + return DAG.getNode(ISD::FNEG, dl, VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FADD, dl, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // If 'unsafe math' is enabled, fold + // (fsub x, x) -> 0.0 & // (fsub x, (fadd x, y)) -> (fneg y) & // (fsub x, (fadd y, x)) -> (fneg y) if (DAG.getTarget().Options.UnsafeFPMath) { + if (N0 == N1) + return DAG.getConstantFP(0.0f, VT); + if (N1.getOpcode() == ISD::FADD) { SDValue N10 = N1->getOperand(0); SDValue N11 = N1->getOperand(1); @@ -5689,6 +5759,40 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } + // FSUB -> FMA combines: + if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || + DAG.getTarget().Options.UnsafeFPMath) && + DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + N1.getOperand(0)), + N1.getOperand(1), N0); + } + + // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, N00), N01, + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + return SDValue(); } @@ -5720,6 +5824,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (DAG.getTarget().Options.UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode())) return N1; + // fold (fmul A, 1.0) -> A + if (N1CFP && N1CFP->isExactlyValue(1.0)) + return N0; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); @@ -5753,6 +5860,26 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFMA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); + if (N1CFP && N1CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2); + + // Canonicalize (fma c, x, y) -> (fma x, c, y) + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + + return SDValue(); +} + SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5893,6 +6020,38 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); } + // The next optimizations are desireable only if SELECT_CC can be lowered. + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + // (See also visitSELECT) + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && + !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + + // fold (sint_to_fp (zext (setcc x, y, cc))) -> + // (select_cc x, y, 1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), + DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), + N0.getOperand(0).getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + } + return SDValue(); } @@ -5918,6 +6077,25 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); } + // The next optimizations are desireable only if SELECT_CC can be lowered. + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + // (See also visitSELECT) + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + + if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + } + return SDValue(); } @@ -6071,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); @@ -6185,7 +6399,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { } // Replace the uses of SRL with SETCC WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -6214,7 +6428,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Tmp.getNode()->dump(&DAG); dbgs() << '\n'); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(N1, Tmp); removeFromWorkList(TheXor); DAG.DeleteNode(TheXor); return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), @@ -6240,7 +6454,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), @@ -6431,21 +6645,17 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), - &DeadNodes); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); } // Finally, since the node is now dead, remove it from the graph. DAG.DeleteNode(N); // Replace the uses of Ptr with uses of the updated base value. - DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); removeFromWorkList(Ptr.getNode()); DAG.DeleteNode(Ptr.getNode()); @@ -6559,13 +6769,10 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), - &DeadNodes); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); } // Finally, since the node is now dead, remove it from the graph. @@ -6573,8 +6780,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), - Result.getValue(isLoad ? 1 : 0), - &DeadNodes); + Result.getValue(isLoad ? 1 : 0)); removeFromWorkList(Op); DAG.DeleteNode(Op); return true; @@ -6609,7 +6815,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { Chain.getNode()->dump(&DAG); dbgs() << "\n"); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); if (N->use_empty()) { removeFromWorkList(N); @@ -6629,11 +6835,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { Undef.getNode()->dump(&DAG); dbgs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), - DAG.getUNDEF(N->getValueType(1)), - &DeadNodes); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes); + DAG.getUNDEF(N->getValueType(1))); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); removeFromWorkList(N); DAG.DeleteNode(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -6955,8 +7160,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { AddToWorkList(NewLD.getNode()); AddToWorkList(NewVal.getNode()); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); ++OpsNarrowed; return NewST; } @@ -7013,8 +7217,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { AddToWorkList(NewLD.getNode()); AddToWorkList(NewST.getNode()); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); ++LdStFP2Int; return NewST; } @@ -7058,7 +7261,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { SDValue Tmp; switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown FP type"); - case MVT::f80: // We don't do this for these yet. + case MVT::f16: // We don't do this for these yet. + case MVT::f80: case MVT::f128: case MVT::ppcf128: break; @@ -7323,8 +7527,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { OrigElt -= NumElem; } + EVT IndexTy = N->getOperand(1).getValueType(); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, - InVec, DAG.getConstant(OrigElt, MVT::i32)); + InVec, DAG.getConstant(OrigElt, IndexTy)); } // Perform only after legalization to ensure build_vector / vector_shuffle @@ -7472,7 +7677,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { WorkListRemover DeadNodes(*this); SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; SDValue To[] = { Load, Chain }; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes); + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); // Since we're explcitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. AddToWorkList(Load.getNode()); @@ -7489,6 +7694,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + // Check to see if this is a BUILD_VECTOR of a bunch of values // which come from any_extend or zero_extend nodes. If so, we can create // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR @@ -7496,12 +7706,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // using shuffles. EVT SourceType = MVT::Other; bool AllAnyExt = true; - bool AllUndef = true; + for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); // Ignore undef inputs. if (In.getOpcode() == ISD::UNDEF) continue; - AllUndef = false; bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; @@ -7529,9 +7738,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { AllAnyExt &= AnyExt; } - if (AllUndef) - return DAG.getUNDEF(VT); - // In order to have valid types, all of the inputs must be extended from the // same source type and all of the inputs must be any or zero extend. // Scalar sizes must be a power of two. @@ -7707,6 +7913,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (N->getNumOperands() == 1) return N->getOperand(0); + // Check if all of the operands are undefs. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(N->getValueType(0)); + return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 0c1ac69..683fac6 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -40,6 +40,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "isel" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" @@ -51,10 +52,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" @@ -484,7 +485,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; // N = N + Offset - TotalOffs += + TotalOffs += TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); if (TotalOffs >= MaxOffs) { N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); @@ -573,7 +574,10 @@ bool FastISel::SelectCall(const User *I) { // At -O0 we don't care about the lifetime intrinsics. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + // The donothing intrinsic does, well, nothing. + case Intrinsic::donothing: return true; + case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || @@ -642,7 +646,7 @@ bool FastISel::SelectCall(const User *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addCImm(CI).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); - else + else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addImm(CI->getZExtValue()).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); @@ -786,13 +790,24 @@ FastISel::SelectInstruction(const Instruction *I) { MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + // As a special case, don't handle calls to builtin library functions that + // may be translated directly to target instructions. + if (const CallInst *Call = dyn_cast<CallInst>(I)) { + const Function *F = Call->getCalledFunction(); + LibFunc::Func Func; + if (F && !F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) + return false; + } + // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; DL = DebugLoc(); return true; } - // Remove dead code. However, ignore call instructions since we've flushed + // Remove dead code. However, ignore call instructions since we've flushed // the local value map and recomputed the insert point. if (!isa<CallInst>(I)) { recomputeInsertPt(); @@ -1037,7 +1052,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(FunctionLoweringInfo &funcInfo) +FastISel::FastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) : FuncInfo(funcInfo), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), @@ -1046,7 +1062,8 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo) TD(*TM.getTargetData()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - TRI(*TM.getRegisterInfo()) { + TRI(*TM.getRegisterInfo()), + LibInfo(libInfo) { } FastISel::~FastISel() {} @@ -1306,6 +1323,30 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm1).addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { @@ -1345,6 +1386,8 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); assert(TargetRegisterInfo::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); + const TargetRegisterClass *RC = MRI.getRegClass(Op0); + MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), ResultReg) .addReg(Op0, getKillRegState(Op0IsKill), Idx); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 8dde919..3e18ea7 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -15,13 +15,13 @@ #define DEBUG_TYPE "function-lowering-info" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 1467d88..4488d27 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -48,16 +48,31 @@ unsigned InstrEmitter::CountResults(SDNode *Node) { return N; } -/// CountOperands - The inputs to target nodes have any actual inputs first, +/// countOperands - The inputs to target nodes have any actual inputs first, /// followed by an optional chain operand, then an optional glue operand. /// Compute the number of actual operands that will go into the resulting /// MachineInstr. -unsigned InstrEmitter::CountOperands(SDNode *Node) { +/// +/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding +/// the chain and glue. These operands may be implicit on the machine instr. +static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { unsigned N = Node->getNumOperands(); while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) --N; // Ignore chain if it exists. + + // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. + for (unsigned I = N; I; --I) { + if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) + continue; + if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) + if (TargetRegisterInfo::isPhysicalRegister(RN->getReg())) + continue; + NumImpUses = N - I; + break; + } + return N; } @@ -114,8 +129,10 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (User->isMachineOpcode()) { const MCInstrDesc &II = TII->get(User->getMachineOpcode()); const TargetRegisterClass *RC = 0; - if (i+II.getNumDefs() < II.getNumOperands()) - RC = TII->getRegClass(II, i+II.getNumDefs(), TRI); + if (i+II.getNumDefs() < II.getNumOperands()) { + RC = TRI->getAllocatableClass( + TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF)); + } if (!UseRC) UseRC = RC; else if (RC) { @@ -196,7 +213,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. unsigned VRBase = 0; - const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI); + const TargetRegisterClass *RC = + TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -293,7 +311,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, if (II) { const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) - DstRC = TII->getRegClass(*II, IIOpNum, TRI); + DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) && "Don't have operand info for this instruction!"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { @@ -334,8 +352,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the -/// operand number (in the II) that we are adding. IIOpNum and II are used for -/// assertions only. +/// operand number (in the II) that we are adding. void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, @@ -350,7 +367,11 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, const ConstantFP *CFP = F->getConstantFPValue(); MI->addOperand(MachineOperand::CreateFPImm(CFP)); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { - MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + // Turn additional physreg operands into implicit uses on non-variadic + // instructions. This is used by call and return instructions passing + // arguments in registers. + bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); + MI->addOperand(MachineOperand::CreateReg(R->getReg(), false, Imp)); } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask())); } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { @@ -390,6 +411,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), BA->getTargetFlags())); + } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(), + TI->getOffset(), + TI->getTargetFlags())); } else { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && @@ -458,7 +483,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && - SubIdx == DefSubIdx) { + SubIdx == DefSubIdx && + TRC == MRI->getRegClass(SrcReg)) { // Optimize these: // r1025 = s/zext r1024, 4 // r1026 = extract_subreg r1025, 4 @@ -467,6 +493,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBase = MRI->createVirtualRegister(TRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); + MRI->clearKillFlags(SrcReg); } else { // VReg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register @@ -548,7 +575,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, // Create the new VReg in the destination class and emit a copy. unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); - const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); + const TargetRegisterClass *DstRC = + TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); @@ -566,7 +594,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, bool IsClone, bool IsCloned) { unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); - unsigned NewVReg = MRI->createVirtualRegister(RC); + unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); unsigned NumOps = Node->getNumOperands(); @@ -691,7 +719,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); - unsigned NodeOperands = CountOperands(Node); + unsigned NumImpUses = 0; + unsigned NodeOperands = countOperands(Node, NumImpUses); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; @@ -700,7 +729,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, "Too few operands for a variadic node!"); else assert(NumMIOperands >= II.getNumOperands() && - NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() && + NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + + NumImpUses && "#operands for dag node doesn't match .td file!"); #endif diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index c081f38..9eddee9 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -105,12 +105,6 @@ public: /// (which do not go into the machine instrs.) static unsigned CountResults(SDNode *Node); - /// CountOperands - The inputs to target nodes have any actual inputs first, - /// followed by an optional chain operand, then flag operands. Compute - /// the number of actual operands that will go into the resulting - /// MachineInstr. - static unsigned CountOperands(SDNode *Node); - /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// MachineInstr *EmitDbgValue(SDDbgValue *SD, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a96a997..908ebb9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -11,7 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -20,10 +24,6 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -70,6 +70,9 @@ private: SDValue OptimizeFloatStore(StoreSDNode *ST); + void LegalizeLoadOps(SDNode *Node); + void LegalizeStoreOps(SDNode *Node); + /// PerformInsertVectorEltInMemory - Some target cannot handle a variable /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform @@ -150,21 +153,21 @@ public: // Node replacement helpers void ReplacedNode(SDNode *N) { if (N->use_empty()) { - DAG.RemoveDeadNode(N, this); + DAG.RemoveDeadNode(N); } else { ForgetNode(N); } } void ReplaceNode(SDNode *Old, SDNode *New) { - DAG.ReplaceAllUsesWith(Old, New, this); + DAG.ReplaceAllUsesWith(Old, New); ReplacedNode(Old); } void ReplaceNode(SDValue Old, SDValue New) { - DAG.ReplaceAllUsesWith(Old, New, this); + DAG.ReplaceAllUsesWith(Old, New); ReplacedNode(Old.getNode()); } void ReplaceNode(SDNode *Old, const SDValue *New) { - DAG.ReplaceAllUsesWith(Old, New, this); + DAG.ReplaceAllUsesWith(Old, New); ReplacedNode(Old); } }; @@ -203,7 +206,8 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, } SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) - : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + : SelectionDAG::DAGUpdateListener(dag), + TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), DAG(dag) { } @@ -424,7 +428,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, DebugLoc dl = LD->getDebugLoc(); if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); - if (TLI.isTypeLegal(intVT)) { + if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), @@ -432,8 +436,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); - if (VT.isFloatingPoint() && LoadedVT != VT) - Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); + if (LoadedVT != VT) + Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : + ISD::ANY_EXTEND, dl, VT, Result); ValResult = Result; ChainResult = Chain; @@ -638,9 +643,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // probably means that we need to integrate dag combiner and legalizer // together. // We generally can't do this one for long doubles. - SDValue Tmp1 = ST->getChain(); - SDValue Tmp2 = ST->getBasePtr(); - SDValue Tmp3; + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); @@ -648,19 +652,19 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && TLI.isTypeLegal(MVT::i32)) { - Tmp3 = DAG.getConstant(CFP->getValueAPF(). + SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), MVT::i32); - return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); } if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. if (TLI.isTypeLegal(MVT::i64)) { - Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); - return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); } @@ -673,11 +677,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile, + Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(4)); - Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, + Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); @@ -688,14 +692,448 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { return SDValue(0, 0); } +void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + DebugLoc dl = Node->getDebugLoc(); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + + if (!ST->isTruncatingStore()) { + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + ReplaceNode(ST, OptStore); + return; + } + + { + SDValue Value = ST->getValue(); + EVT VT = Value.getValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + if (ST->getAlignment() < ABIAlignment) + ExpandUnalignedStore(cast<StoreSDNode>(Node), + DAG, TLI, this); + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Promote: { + assert(VT.isVector() && "Unknown legal promote case!"); + Value = DAG.getNode(ISD::BITCAST, dl, + TLI.getTypeToPromoteTo(ISD::STORE, VT), Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + return; + } + } else { + SDValue Value = ST->getValue(); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); + Value = DAG.getZeroExtendInReg(Value, dl, StVT); + SDValue Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(!StVT.isVector() && "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + RoundVT, + isVolatile, isNonTemporal, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Value.getValueType()))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + } else { + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Value.getValueType()))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), + RoundVT, isVolatile, isNonTemporal, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + } + + // The order of the stores doesn't matter. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); + } else { + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + if (ST->getAlignment() < ABIAlignment) + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Expand: + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); + + // TRUNCSTORE:i16 i32 -> STORE i16 + assert(TLI.isTypeLegal(StVT) && + "Do not know how to expand this store!"); + Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + } +} + +void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { + LoadSDNode *LD = cast<LoadSDNode>(Node); + SDValue Chain = LD->getChain(); // The chain. + SDValue Ptr = LD->getBasePtr(); // The base pointer. + SDValue Value; // The value returned by the load op. + DebugLoc dl = Node->getDebugLoc(); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) { + EVT VT = Node->getValueType(0); + SDValue RVal = SDValue(Node, 0); + SDValue RChain = SDValue(Node, 1); + + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, RVal, RChain); + } + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(RVal, DAG); + if (Res.getNode()) { + RVal = Res; + RChain = Res.getValue(1); + } + break; + } + case TargetLowering::Promote: { + // Only promote a load of vector type to another. + assert(VT.isVector() && "Cannot promote this load!"); + // Change base type to a different vector type. + EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + + SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); + RChain = Res.getValue(1); + break; + } + } + if (RChain.getNode() != Node) { + assert(RVal.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); + ReplacedNode(Node); + } + return; + } + + EVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Value = Result; + Chain = Ch; + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(!SrcVT.isVector() && "Unsupported extload!"); + unsigned RoundWidth = 1 << Log2_32(SrcWidth); + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), + Chain, Ptr, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } + + Chain = Ch; + } else { + bool isCustom = false; + switch (TLI.getLoadExtAction(ExtType, SrcVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: { + Value = SDValue(Node, 0); + Chain = SDValue(Node, 1); + + if (isCustom) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { + Value = Res; + Chain = Res.getValue(1); + } + } else { + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, Value, Chain); + } + } + } + break; + } + case TargetLowering::Expand: + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { + SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); + break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); + } + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); + break; + } + + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); + + // FIXME: This does not work for vectors on most targets. Sign- and + // zero-extend operations are currently folded into extending loads, + // whether they are legal or not, and then we end up here without any + // support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an explicit + // zero/sign extend inreg. + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); + Value = ValRes; + Chain = Result.getValue(1); + break; + } + } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + if (Chain.getNode() != Node) { + assert(Value.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + ReplacedNode(Node); + } +} + /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. return; - DebugLoc dl = Node->getDebugLoc(); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == TargetLowering::TypeLegal && @@ -708,9 +1146,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); - SDValue Tmp1, Tmp2, Tmp3, Tmp4; - bool isCustom = false; - // Figure out the correct action; the way to query this varies by opcode TargetLowering::LegalizeAction Action = TargetLowering::Legal; bool SimpleFinishLegalizing = true; @@ -816,9 +1251,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } if (SimpleFinishLegalizing) { - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - Ops.push_back(Node->getOperand(i)); + SDNode *NewNode = Node; switch (Node->getOpcode()) { default: break; case ISD::SHL: @@ -828,11 +1261,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::ROTR: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[1].getValueType().isVector()) { - SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]); + if (!Node->getOperand(1).getValueType().isVector()) { + SDValue SAO = + DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), + Node->getOperand(1)); HandleSDNode Handle(SAO); LegalizeOp(SAO.getNode()); - Ops[1] = Handle.getValue(); + NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), + Handle.getValue()); } break; case ISD::SRL_PARTS: @@ -840,18 +1276,21 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SHL_PARTS: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[2].getValueType().isVector()) { - SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]); + if (!Node->getOperand(2).getValueType().isVector()) { + SDValue SAO = + DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), + Node->getOperand(2)); HandleSDNode Handle(SAO); LegalizeOp(SAO.getNode()); - Ops[2] = Handle.getValue(); + NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), + Node->getOperand(1), + Handle.getValue()); } break; } - SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); if (NewNode != Node) { - DAG.ReplaceAllUsesWith(Node, NewNode, this); + DAG.ReplaceAllUsesWith(Node, NewNode); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); ReplacedNode(Node); @@ -860,27 +1299,27 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { switch (Action) { case TargetLowering::Legal: return; - case TargetLowering::Custom: + case TargetLowering::Custom: { // FIXME: The handling for custom lowering with multiple results is // a complete mess. - Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { SmallVector<SDValue, 8> ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { if (e == 1) - ResultVals.push_back(Tmp1); + ResultVals.push_back(Res); else - ResultVals.push_back(Tmp1.getValue(i)); + ResultVals.push_back(Res.getValue(i)); } - if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) { - DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); + if (Res.getNode() != Node || Res.getResNo() != 0) { + DAG.ReplaceAllUsesWith(Node, ResultVals.data()); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); ReplacedNode(Node); } return; } - + } // FALL THROUGH case TargetLowering::Expand: ExpandNode(Node); @@ -904,428 +1343,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::CALLSEQ_END: break; case ISD::LOAD: { - LoadSDNode *LD = cast<LoadSDNode>(Node); - Tmp1 = LD->getChain(); // Legalize the chain. - Tmp2 = LD->getBasePtr(); // Legalize the base pointer. - - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType == ISD::NON_EXTLOAD) { - EVT VT = Node->getValueType(0); - Tmp3 = SDValue(Node, 0); - Tmp4 = SDValue(Node, 1); - - switch (TLI.getOperationAction(Node->getOpcode(), VT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, Tmp3, Tmp4); - } - } - break; - case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(Tmp3, DAG); - if (Tmp1.getNode()) { - Tmp3 = Tmp1; - Tmp4 = Tmp1.getValue(1); - } - break; - case TargetLowering::Promote: { - // Only promote a load of vector type to another. - assert(VT.isVector() && "Cannot promote this load!"); - // Change base type to a different vector type. - EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); - - Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); - Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); - Tmp4 = Tmp1.getValue(1); - break; - } - } - if (Tmp4.getNode() != Node) { - assert(Tmp3.getNode() != Node && "Load must be completely replaced"); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); - ReplacedNode(Node); - } - return; - } - - EVT SrcVT = LD->getMemoryVT(); - unsigned SrcWidth = SrcVT.getSizeInBits(); - unsigned Alignment = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - - if (SrcWidth != SrcVT.getStoreSizeInBits() && - // Some targets pretend to have an i1 loading operation, and actually - // load an i8. This trick is correct for ZEXTLOAD because the top 7 - // bits are guaranteed to be zero; it helps the optimizers understand - // that these bits are zero. It is also useful for EXTLOAD, since it - // tells the optimizers that those bits are undefined. It would be - // nice to have an effective generic way of getting these benefits... - // Until such a way is found, don't insist on promoting i1 here. - (SrcVT != MVT::i1 || - TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { - // Promote to a byte-sized load if not loading an integral number of - // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. - unsigned NewWidth = SrcVT.getStoreSizeInBits(); - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); - SDValue Ch; - - // The extra bits are guaranteed to be zero, since we stored them that - // way. A zext load from NVT thus automatically gives zext from SrcVT. - - ISD::LoadExtType NewExtType = - ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - - SDValue Result = - DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); - - Ch = Result.getValue(1); // The chain. - - if (ExtType == ISD::SEXTLOAD) - // Having the top bits zero doesn't help when sign extending. - Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) - // All the top bits are guaranteed to be zero - inform the optimizers. - Result = DAG.getNode(ISD::AssertZext, dl, - Result.getValueType(), Result, - DAG.getValueType(SrcVT)); - - Tmp1 = Result; - Tmp2 = Ch; - } else if (SrcWidth & (SrcWidth - 1)) { - // If not loading a power-of-2 number of bits, expand as two loads. - assert(!SrcVT.isVector() && "Unsupported extload!"); - unsigned RoundWidth = 1 << Log2_32(SrcWidth); - assert(RoundWidth < SrcWidth); - unsigned ExtraWidth = SrcWidth - RoundWidth; - assert(ExtraWidth < RoundWidth); - assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && - "Load size not an integral number of bytes!"); - EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); - EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); - SDValue Lo, Hi, Ch; - unsigned IncrementSize; - - if (TLI.isLittleEndian()) { - // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) - // Load the bottom RoundWidth bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, - LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); - - // Load the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, - LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - - // Build a factor node to remember that this load is independent of - // the other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); - - // Join the hi and lo parts. - Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); - } else { - // Big endian - avoid unaligned loads. - // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 - // Load the top RoundWidth bits. - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, - LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); - - // Load the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, - dl, Node->getValueType(0), Tmp1, Tmp2, - LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - - // Build a factor node to remember that this load is independent of - // the other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); - - // Join the hi and lo parts. - Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); - } - - Tmp2 = Ch; - } else { - switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Custom: - isCustom = true; - // FALLTHROUGH - case TargetLowering::Legal: - Tmp1 = SDValue(Node, 0); - Tmp2 = SDValue(Node, 1); - - if (isCustom) { - Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp3.getNode()) { - Tmp1 = Tmp3; - Tmp2 = Tmp3.getValue(1); - } - } else { - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, Tmp1, Tmp2); - } - } - } - break; - case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); - break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); - } - Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp2 = Load.getValue(1); - break; - } - - assert(!SrcVT.isVector() && - "Vector Loads are handled in LegalizeVectorOps"); - - // FIXME: This does not work for vectors on most targets. Sign- and - // zero-extend operations are currently folded into extending loads, - // whether they are legal or not, and then we end up here without any - // support for legalizing them. - assert(ExtType != ISD::EXTLOAD && - "EXTLOAD should always be supported!"); - // Turn the unsupported load into an EXTLOAD followed by an explicit - // zero/sign extend inreg. - SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - SDValue ValRes; - if (ExtType == ISD::SEXTLOAD) - ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Tmp1 = ValRes; - Tmp2 = Result.getValue(1); - break; - } - } - - // Since loads produce two values, make sure to remember that we legalized - // both of them. - if (Tmp2.getNode() != Node) { - assert(Tmp1.getNode() != Node && "Load must be completely replaced"); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); - ReplacedNode(Node); - } - break; + return LegalizeLoadOps(Node); } case ISD::STORE: { - StoreSDNode *ST = cast<StoreSDNode>(Node); - Tmp1 = ST->getChain(); - Tmp2 = ST->getBasePtr(); - unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); - - if (!ST->isTruncatingStore()) { - if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - ReplaceNode(ST, OptStore); - break; - } - - { - Tmp3 = ST->getValue(); - EVT VT = Tmp3.getValueType(); - switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: - // If this is an unaligned store and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast<StoreSDNode>(Node), - DAG, TLI, this); - } - break; - case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) - ReplaceNode(SDValue(Node, 0), Tmp1); - break; - case TargetLowering::Promote: { - assert(VT.isVector() && "Unknown legal promote case!"); - Tmp3 = DAG.getNode(ISD::BITCAST, dl, - TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); - SDValue Result = - DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); - ReplaceNode(SDValue(Node, 0), Result); - break; - } - } - break; - } - } else { - Tmp3 = ST->getValue(); - - EVT StVT = ST->getMemoryVT(); - unsigned StWidth = StVT.getSizeInBits(); - - if (StWidth != StVT.getStoreSizeInBits()) { - // Promote to a byte-sized store with upper bits zero if not - // storing an integral number of bytes. For example, promote - // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), - StVT.getStoreSizeInBits()); - Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - SDValue Result = - DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); - ReplaceNode(SDValue(Node, 0), Result); - } else if (StWidth & (StWidth - 1)) { - // If not storing a power-of-2 number of bits, expand as two stores. - assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned RoundWidth = 1 << Log2_32(StWidth); - assert(RoundWidth < StWidth); - unsigned ExtraWidth = StWidth - RoundWidth; - assert(ExtraWidth < RoundWidth); - assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && - "Store size not an integral number of bytes!"); - EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); - EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); - SDValue Lo, Hi; - unsigned IncrementSize; - - if (TLI.isLittleEndian()) { - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) - // Store the bottom RoundWidth bits. - Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - RoundVT, - isVolatile, isNonTemporal, Alignment); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, - DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Tmp3.getValueType()))); - Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, - ST->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - } else { - // Big endian - avoid unaligned stores. - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X - // Store the top RoundWidth bits. - Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, - DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Tmp3.getValueType()))); - Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(), - RoundVT, isVolatile, isNonTemporal, Alignment); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - } - - // The order of the stores doesn't matter. - SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - ReplaceNode(SDValue(Node, 0), Result); - } else { - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: - // If this is an unaligned store and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); - } - break; - case TargetLowering::Custom: - ReplaceNode(SDValue(Node, 0), - TLI.LowerOperation(SDValue(Node, 0), DAG)); - break; - case TargetLowering::Expand: - assert(!StVT.isVector() && - "Vector Stores are handled in LegalizeVectorOps"); - - // TRUNCSTORE:i16 i32 -> STORE i16 - assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); - Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - SDValue Result = - DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); - ReplaceNode(SDValue(Node, 0), Result); - break; - } - } - } - break; + return LegalizeStoreOps(Node); } } } @@ -1795,11 +1816,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, if (isTailCall) InChain = TCChain; - std::pair<SDValue, SDValue> CallInfo = - TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + if (!CallInfo.second.getNode()) // It's a tailcall, return the chain (which is the DAG root). @@ -1828,11 +1851,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, TLI.getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - std::pair<SDValue,SDValue> CallInfo = - TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo.first; } @@ -1860,11 +1885,12 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - std::pair<SDValue, SDValue> CallInfo = - TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo; } @@ -1919,9 +1945,11 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, return TLI.getLibcallName(LC) != 0; } -/// UseDivRem - Only issue divrem libcall if both quotient and remainder are +/// useDivRem - Only issue divrem libcall if both quotient and remainder are /// needed. -static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) { +static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { + // The other use might have been replaced with a divrem already. + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; unsigned OtherOpcode = 0; if (isSigned) OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; @@ -1935,7 +1963,7 @@ static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) { SDNode *User = *UI; if (User == Node) continue; - if (User->getOpcode() == OtherOpcode && + if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) && User->getOperand(0) == Op0 && User->getOperand(1) == Op1) return true; @@ -1992,11 +2020,12 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, TLI.getPointerTy()); DebugLoc dl = Node->getDebugLoc(); - std::pair<SDValue, SDValue> CallInfo = - TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); // Remainder is loaded back from the stack frame. SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, @@ -2570,14 +2599,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If the target didn't lower this, lower it to '__sync_synchronize()' call // FIXME: handle "fence singlethread" more efficiently. TargetLowering::ArgListTy Args; - std::pair<SDValue, SDValue> CallResult = - TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), + TargetLowering:: + CallLoweringInfo CLI(Node->getOperand(0), + Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()), Args, DAG, dl); + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + Results.push_back(CallResult.second); break; } @@ -2647,13 +2679,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::TRAP: { // If this operation is not supported, lower it to 'abort()' call TargetLowering::ArgListTy Args; - std::pair<SDValue, SDValue> CallResult = - TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), + TargetLowering:: + CallLoweringInfo CLI(Node->getOperand(0), + Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("abort", TLI.getPointerTy()), Args, DAG, dl); + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + Results.push_back(CallResult.second); break; } @@ -3059,7 +3094,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { "Don't know how to expand this subtraction!"); Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); - Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT)); + Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, VT)); Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); break; } @@ -3074,7 +3109,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(1); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || (isDivRemLibcallAvailable(Node, isSigned, TLI) && - UseDivRem(Node, isSigned, false))) { + useDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y @@ -3102,7 +3137,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDVTList VTs = DAG.getVTList(VT, VT); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || (isDivRemLibcallAvailable(Node, isSigned, TLI) && - UseDivRem(Node, isSigned, true))) + useDivRem(Node, isSigned, true))) Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); else if (isSigned) diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 95ddb1e..e8e968a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -588,18 +588,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { unsigned NumElts = InVT.getVectorNumElements(); assert(NumElts == NVT.getVectorNumElements() && "Dst and Src must have the same number of elements"); - EVT EltVT = InVT.getScalarType(); assert(isPowerOf2_32(NumElts) && "Promoted vector type must be a power of two"); - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2); + SDValue EOp1, EOp2; + GetSplitVector(InOp, EOp1, EOp2); + EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), NumElts/2); - - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, - DAG.getIntPtrConstant(0)); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, - DAG.getIntPtrConstant(NumElts/2)); EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); @@ -2273,9 +2269,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), - RHS, DAG.getConstant(0, VT), ISD::SETNE); + RHS, DAG.getConstant(0, VT), ISD::SETNE); SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, - DAG.getConstant(1, VT), RHS); + DAG.getConstant(1, VT), RHS); SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); SDValue Overflow; Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); @@ -2296,8 +2292,8 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Temp = DAG.CreateStackTemporary(PtrVT); // Temporary for the overflow value, default it to zero. SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, - DAG.getConstant(0, PtrVT), Temp, - MachinePointerInfo(), false, false, 0); + DAG.getConstant(0, PtrVT), Temp, + MachinePointerInfo(), false, false, 0); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -2319,16 +2315,17 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, Args.push_back(Entry); SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); - std::pair<SDValue, SDValue> CallInfo = - TLI.LowerCallTo(Chain, RetTy, true, false, false, false, - 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Func, Args, DAG, dl); + TargetLowering:: + CallLoweringInfo CLI(Chain, RetTy, true, false, false, false, + 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Func, Args, DAG, dl); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); SplitInteger(CallInfo.first, Lo, Hi); SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, - MachinePointerInfo(), false, false, false, 0); + MachinePointerInfo(), false, false, false, 0); SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, DAG.getConstant(0, PtrVT), ISD::SETNE); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 439aa4d..39337ff 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -628,7 +628,8 @@ namespace { public: explicit NodeUpdateListener(DAGTypeLegalizer &dtl, SmallSetVector<SDNode*, 16> &nta) - : DTL(dtl), NodesToAnalyze(nta) {} + : SelectionDAG::DAGUpdateListener(dtl.getDAG()), + DTL(dtl), NodesToAnalyze(nta) {} virtual void NodeDeleted(SDNode *N, SDNode *E) { assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && @@ -680,7 +681,7 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { SmallSetVector<SDNode*, 16> NodesToAnalyze; NodeUpdateListener NUL(*this, NodesToAnalyze); do { - DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); + DAG.ReplaceAllUsesOfValueWith(From, To); // The old node may still be present in a map like ExpandedIntegers or // PromotedIntegers. Inform maps about the replacement. @@ -709,7 +710,7 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { SDValue NewVal(M, i); if (M->getNodeId() == Processed) RemapValue(NewVal); - DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal); // OldVal may be a target of the ReplacedValues map which was marked // NewNode to force reanalysis because it was updated. Ensure that // anything that ReplacedValues mapped to OldVal will now be mapped @@ -950,7 +951,7 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) if (i != ResNo) ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); - return SDValue(N, ResNo); + return SDValue(N->getOperand(ResNo)); } /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type @@ -1054,12 +1055,14 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, TLI.getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - std::pair<SDValue,SDValue> CallInfo = - TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); + return CallInfo.first; } @@ -1086,11 +1089,12 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - std::pair<SDValue, SDValue> CallInfo = - TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e866445..94fc976 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -135,6 +135,8 @@ public: ReplacedValues[SDValue(Old, i)] = SDValue(New, i); } + SelectionDAG &getDAG() const { return DAG; } + private: SDNode *AnalyzeNewNode(SDNode *N); void AnalyzeNewValue(SDValue &Val); @@ -151,7 +153,7 @@ private: /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES /// node with the corresponding input operand, except for the result 'ResNo', - /// which is returned. + /// for which the corresponding input operand is returned. SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); @@ -509,10 +511,12 @@ private: void ScalarizeVectorResult(SDNode *N, unsigned OpNo); SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_TernaryOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_InregOp(SDNode *N); SDValue ScalarizeVecRes_BITCAST(SDNode *N); + SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); @@ -553,6 +557,7 @@ private: // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. void SplitVectorResult(SDNode *N, unsigned OpNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index a8ff7c6..06f6bd6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -168,6 +168,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue OldVec = N->getOperand(0); unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + EVT OldEltVT = OldVec.getValueType().getVectorElementType(); DebugLoc dl = N->getDebugLoc(); // Convert to a vector of the expanded element type, for example @@ -175,6 +176,15 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT OldVT = N->getValueType(0); EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); + if (OldVT != OldEltVT) { + // The result of EXTRACT_VECTOR_ELT may be larger than the element type of + // the input vector. If so, extend the elements of the input vector to the + // same bitwidth as the result before expanding. + assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!"); + EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts); + OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0)); + } + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 9fe4480..704f99b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -71,6 +71,9 @@ class VectorLegalizer { // operands to a different type and bitcasting the result back to the // original type. SDValue PromoteVectorOp(SDValue Op); + // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input + // operand to the next size up. + SDValue PromoteVectorOpINT_TO_FP(SDValue Op); public: bool Run(); @@ -231,9 +234,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { case TargetLowering::Promote: - // "Promote" the operation by bitcasting - Result = PromoteVectorOp(Op); - Changed = true; + switch (Op.getOpcode()) { + default: + // "Promote" the operation by bitcasting + Result = PromoteVectorOp(Op); + Changed = true; + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // "Promote" the operation by extending the operand. + Result = PromoteVectorOpINT_TO_FP(Op); + Changed = true; + break; + } break; case TargetLowering::Legal: break; case TargetLowering::Custom: { @@ -293,6 +306,44 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } +SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { + // INT_TO_FP operations may require the input operand be promoted even + // when the type is otherwise legal. + EVT VT = Op.getOperand(0).getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + + // Normal getTypeToPromoteTo() doesn't work here, as that will promote + // by widening the vector w/ the same element width and twice the number + // of elements. We want the other way around, the same number of elements, + // each twice the width. + // + // Increase the bitwidth of the element to the next pow-of-two + // (which is greater than 8 bits). + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits()); + assert(EltVT.isSimple() && "Promoting to a non-simple vector type!"); + + // Build a new vector type and check if it is legal. + MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + + DebugLoc dl = Op.getDebugLoc(); + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + + unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : + ISD::SIGN_EXTEND; + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0], + Operands.size()); +} + SDValue VectorLegalizer::ExpandLoad(SDValue Op) { DebugLoc dl = Op.getDebugLoc(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 5f23f01..4709202 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -48,7 +48,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; - case ISD::BUILD_VECTOR: R = N->getOperand(0); break; + case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; @@ -115,6 +115,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SRL: R = ScalarizeVecRes_BinOp(N); break; + case ISD::FMA: + R = ScalarizeVecRes_TernaryOp(N); + break; } // If R is null, the sub-method took care of registering the result. @@ -129,6 +132,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + SDValue Op1 = GetScalarizedVector(N->getOperand(1)); + SDValue Op2 = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + Op0.getValueType(), Op0, Op1, Op2); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); @@ -141,6 +152,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { NewVT, N->getOperand(0)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + // The BUILD_VECTOR operands may be of wider element types and + // we may need to truncate them back to the requested return type. + if (EltVT.isInteger()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return InOp; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op0 = GetScalarizedVector(N->getOperand(0)); @@ -436,7 +457,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; - + // See if the target wants to custom expand this node. if (CustomLowerNode(N, N->getValueType(ResNo), true)) return; @@ -448,7 +469,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to split the result of this operator!"); + report_fatal_error("Do not know how to split the result of this " + "operator!\n"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::VSELECT: @@ -529,6 +551,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break; + case ISD::FMA: + SplitVecRes_TernaryOp(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -548,6 +573,22 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); } +void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Op0Lo, Op0Hi; + GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); + SDValue Op1Lo, Op1Hi; + GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); + SDValue Op2Lo, Op2Hi; + GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), + Op0Lo, Op1Lo, Op2Lo); + Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), + Op0Hi, Op1Hi, Op2Hi); +} + void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // We know the result is a vector. The input may be either a vector or a @@ -977,7 +1018,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { N->dump(&DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to split this operator's operand!"); + report_fatal_error("Do not know how to split this operator's " + "operand!\n"); + case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; @@ -1203,15 +1246,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { DebugLoc DL = N->getDebugLoc(); GetSplitVector(N->getOperand(0), Lo, Hi); EVT InVT = Lo.getValueType(); - + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), InVT.getVectorNumElements()); - + Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); - + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); -} +} @@ -1755,8 +1798,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { if (InputWidened) InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) - Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(j)); + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -1816,7 +1859,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, DAG.getIntPtrConstant(0)); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); + SatOp, CvtCode); } } @@ -1832,7 +1875,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, DAG.getIntPtrConstant(i)); Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, - SatOp, CvtCode); + SatOp, CvtCode); } SDValue UndefVal = DAG.getUNDEF(EltVT); @@ -1936,7 +1979,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { Cond1 = GetWidenedVector(Cond1); if (Cond1.getValueType() != CondWidenVT) - Cond1 = ModifyToType(Cond1, CondWidenVT); + Cond1 = ModifyToType(Cond1, CondWidenVT); } SDValue InOp1 = GetWidenedVector(N->getOperand(1)); @@ -2202,7 +2245,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getIntPtrConstant(0)); - return PromoteTargetBoolean(CC, N->getValueType(0)); + return PromoteTargetBoolean(CC, N->getValueType(0)); } @@ -2371,10 +2414,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), - isVolatile, - isNonTemporal, isInvariant, - MinAlign(Align, Increment)); + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment)); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector<SDValue, 16> Loads; @@ -2563,7 +2604,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Increment)); - } while (StWidth != 0 && StWidth >= NewVTWidth); + } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type Idx = Idx * NewVTWidth / ValEltWidth; } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index ff0136e..c3794d5 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -50,7 +50,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : const TargetMachine &tm = (*IS->MF).getTarget(); ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); - // This hard requirment could be relaxed, but for now + // This hard requirement could be relaxed, but for now // do not let it procede. assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); @@ -318,7 +318,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) { // If packet is now full, reset the state so in the next cycle // we start fresh. - if (Packet.size() >= InstrItins->IssueWidth) { + if (Packet.size() >= InstrItins->SchedModel->IssueWidth) { ResourcesModel->clearResources(); Packet.clear(); } @@ -353,7 +353,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { } /// Estimates change in reg pressure from this SU. -/// It is acheived by trivial tracking of defined +/// It is achieved by trivial tracking of defined /// and used vregs in dependent instructions. /// The RawPressure flag makes this function to ignore /// existing reg file sizes, and report raw def/use diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 24da432..b7ce48a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -441,19 +441,14 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, SmallVector<unsigned, 4> &LRegs, const TargetRegisterInfo *TRI) { bool Added = false; - if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { - if (RegAdded.insert(Reg)) { - LRegs.push_back(Reg); - Added = true; - } - } - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) - if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { - if (RegAdded.insert(*Alias)) { - LRegs.push_back(*Alias); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { + if (RegAdded.insert(*AI)) { + LRegs.push_back(*AI); Added = true; } } + } return Added; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 2cb5d37..bf0a437 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -266,7 +266,8 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, - unsigned &RegClass, unsigned &Cost) { + unsigned &RegClass, unsigned &Cost, + const MachineFunction &MF) { EVT VT = RegDefPos.GetValue(); // Special handling for untyped values. These values can only come from @@ -285,7 +286,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, unsigned Idx = RegDefPos.GetIdx(); const MCInstrDesc Desc = TII->get(Opcode); - const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); RegClass = RC->getID(); // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a // better way to determine it. @@ -852,7 +853,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { } /// After backtracking, the hazard checker needs to be restored to a state -/// corresponding the the current cycle. +/// corresponding the current cycle. void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { HazardRec->Reset(); @@ -1181,7 +1182,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SmallSet<unsigned, 4> &RegAdded, SmallVector<unsigned, 4> &LRegs, const TargetRegisterInfo *TRI) { - for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { + for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { // Check if Ref is live. if (!LiveRegDefs[*AliasI]) continue; @@ -1920,7 +1921,7 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); RegDefPos.IsValid(); RegDefPos.Advance()) { unsigned RCId, Cost; - GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) return true; @@ -2034,7 +2035,7 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) { continue; unsigned RCId, Cost; - GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); RegPressure[RCId] += Cost; break; } @@ -2049,7 +2050,7 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) { if (SkipRegDefs > 0) continue; unsigned RCId, Cost; - GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); if (RegPressure[RCId] < Cost) { // Register pressure tracking is imprecise. This can happen. But we try // hard not to let it happen because it likely results in poor scheduling. @@ -2330,22 +2331,21 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, // and latency. if (!checkPref || (left->SchedulingPref == Sched::ILP || right->SchedulingPref == Sched::ILP)) { - if (DisableSchedCycles) { + // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer + // is enabled, grouping instructions by cycle, then its height is already + // covered so only its depth matters. We also reach this point if both stall + // but have the same height. + if (!SPQ->getHazardRec()->isEnabled()) { if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; } - else { - // If neither instruction stalls (!LStall && !RStall) then - // its height is already covered so only its depth matters. We also reach - // this if both stall but have the same height. - int LDepth = left->getDepth() - LPenalty; - int RDepth = right->getDepth() - RPenalty; - if (LDepth != RDepth) { - DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum - << ") depth " << LDepth << " vs SU (" << right->NodeNum - << ") depth " << RDepth << "\n"); - return LDepth < RDepth ? 1 : -1; - } + int LDepth = left->getDepth() - LPenalty; + int RDepth = right->getDepth() - RPenalty; + if (LDepth != RDepth) { + DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum + << ") depth " << LDepth << " vs SU (" << right->NodeNum + << ") depth " << RDepth << "\n"); + return LDepth < RDepth ? 1 : -1; } if (left->Latency != right->Latency) return left->Latency > right->Latency ? 1 : -1; @@ -2363,7 +2363,7 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { bool RHasPhysReg = right->hasPhysRegDefs; if (LHasPhysReg != RHasPhysReg) { #ifndef NDEBUG - const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; + const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"}; #endif DEBUG(dbgs() << " SU (" << left->NodeNum << ") " << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 75940ec..84e41fc 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -61,6 +61,7 @@ namespace llvm { if (isa<BasicBlockSDNode>(Node)) return true; if (isa<FrameIndexSDNode>(Node)) return true; if (isa<ConstantPoolSDNode>(Node)) return true; + if (isa<TargetIndexSDNode>(Node)) return true; if (isa<JumpTableSDNode>(Node)) return true; if (isa<ExternalSymbolSDNode>(Node)) return true; if (isa<BlockAddressSDNode>(Node)) return true; @@ -98,12 +99,6 @@ namespace llvm { /// virtual void computeLatency(SUnit *SU); - /// computeOperandLatency - Override dependence edge latency using - /// operand use/def information - /// - virtual void computeOperandLatency(SUnit *Def, SUnit *Use, - SDep& dep) const { } - virtual void computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 92671d1..f4fe892 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -14,16 +14,16 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeOrdering.h" #include "SDNodeDbgValue.h" +#include "llvm/CallingConv.h" #include "llvm/Constants.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/DebugInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" -#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -71,7 +71,9 @@ static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { } } -SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {} +// Default null implementations of the callbacks. +void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} +void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} //===----------------------------------------------------------------------===// // ConstantFPSDNode Class @@ -217,6 +219,22 @@ bool ISD::isScalarToVector(const SDNode *N) { return true; } +/// allOperandsUndef - Return true if the node has at least one operand +/// and all operands of the specified node are ISD::UNDEF. +bool ISD::allOperandsUndef(const SDNode *N) { + // Return false if the node has no operands. + // This is "logically inconsistent" with the definition of "all" but + // is probably the desired behavior. + if (N->getNumOperands() == 0) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e ; ++i) + if (N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + + return true; +} + /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) /// when given the operation for (X op Y). ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { @@ -385,6 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddPointer(GA->getGlobal()); ID.AddInteger(GA->getOffset()); ID.AddInteger(GA->getTargetFlags()); + ID.AddInteger(GA->getAddressSpace()); break; } case ISD::BasicBlock: @@ -420,16 +439,25 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(CP->getTargetFlags()); break; } + case ISD::TargetIndex: { + const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N); + ID.AddInteger(TI->getIndex()); + ID.AddInteger(TI->getOffset()); + ID.AddInteger(TI->getTargetFlags()); + break; + } case ISD::LOAD: { const LoadSDNode *LD = cast<LoadSDNode>(N); ID.AddInteger(LD->getMemoryVT().getRawBits()); ID.AddInteger(LD->getRawSubclassData()); + ID.AddInteger(LD->getPointerInfo().getAddrSpace()); break; } case ISD::STORE: { const StoreSDNode *ST = cast<StoreSDNode>(N); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } case ISD::ATOMIC_CMP_SWAP: @@ -449,6 +477,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { const AtomicSDNode *AT = cast<AtomicSDNode>(N); ID.AddInteger(AT->getMemoryVT().getRawBits()); ID.AddInteger(AT->getRawSubclassData()); + ID.AddInteger(AT->getPointerInfo().getAddrSpace()); + break; + } + case ISD::PREFETCH: { + const MemSDNode *PF = cast<MemSDNode>(N); + ID.AddInteger(PF->getPointerInfo().getAddrSpace()); break; } case ISD::VECTOR_SHUFFLE: { @@ -465,6 +499,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { break; } } // end switch (N->getOpcode()) + + // Target specific memory nodes could also have address spaces to check. + if (N->isTargetMemoryOpcode()) + ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); } /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID @@ -544,16 +582,15 @@ void SelectionDAG::RemoveDeadNodes() { /// RemoveDeadNodes - This method deletes the unreachable nodes in the /// given list, and any nodes that become unreachable as a result. -void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes, - DAGUpdateListener *UpdateListener) { +void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) { // Process the worklist, deleting the nodes and adding their uses to the // worklist. while (!DeadNodes.empty()) { SDNode *N = DeadNodes.pop_back_val(); - if (UpdateListener) - UpdateListener->NodeDeleted(N, 0); + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeDeleted(N, 0); // Take the node out of the appropriate CSE map. RemoveNodeFromCSEMaps(N); @@ -574,7 +611,7 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes, } } -void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ +void SelectionDAG::RemoveDeadNode(SDNode *N){ SmallVector<SDNode*, 16> DeadNodes(1, N); // Create a dummy node that adds a reference to the root node, preventing @@ -582,7 +619,7 @@ void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ // dead node.) HandleSDNode Dummy(getRoot()); - RemoveDeadNodes(DeadNodes, UpdateListener); + RemoveDeadNodes(DeadNodes); } void SelectionDAG::DeleteNode(SDNode *N) { @@ -684,8 +721,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { /// node. This transfer can potentially trigger recursive merging. /// void -SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N, - DAGUpdateListener *UpdateListener) { +SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { // For node types that aren't CSE'd, just act as if no identical node // already exists. if (!doNotCSE(N)) { @@ -694,20 +730,19 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N, // If there was already an existing matching node, use ReplaceAllUsesWith // to replace the dead one with the existing one. This can cause // recursive merging of other unrelated nodes down the line. - ReplaceAllUsesWith(N, Existing, UpdateListener); + ReplaceAllUsesWith(N, Existing); - // N is now dead. Inform the listener if it exists and delete it. - if (UpdateListener) - UpdateListener->NodeDeleted(N, Existing); + // N is now dead. Inform the listeners and delete it. + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeDeleted(N, Existing); DeleteNodeNotInCSEMaps(N); return; } } - // If the node doesn't already exist, we updated it. Inform a listener if - // it exists. - if (UpdateListener) - UpdateListener->NodeUpdated(N); + // If the node doesn't already exist, we updated it. Inform listeners. + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeUpdated(N); } /// FindModifiedNodeSlot - Find a slot for the specified node if its operands @@ -855,7 +890,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), - Root(getEntryNode()), Ordering(0) { + Root(getEntryNode()), Ordering(0), UpdateListeners(0) { AllNodes.push_back(&EntryNode); Ordering = new SDNodeOrdering(); DbgInfo = new SDDbgInfo(); @@ -867,6 +902,7 @@ void SelectionDAG::init(MachineFunction &mf) { } SelectionDAG::~SelectionDAG() { + assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); allnodes_clear(); delete Ordering; delete DbgInfo; @@ -1084,6 +1120,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); + ID.AddInteger(GV->getType()->getAddressSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1183,6 +1220,24 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, return SDValue(N, 0); } +SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, + unsigned char TargetFlags) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0); + ID.AddInteger(Index); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, + TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); @@ -1949,6 +2004,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero |= (~InMask); + KnownOne &= (~KnownZero); return; } case ISD::FGETSIGN: @@ -2246,8 +2302,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ } // Handle LOADX separately here. EXTLOAD case will fallthrough. - if (Op.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast<LoadSDNode>(Op); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { unsigned ExtType = LD->getExtensionType(); switch (ExtType) { default: break; @@ -2428,6 +2483,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. @@ -2675,6 +2748,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N1 == N2) return N1; break; case ISD::CONCAT_VECTORS: + // Concat of UNDEFs is UNDEF. + if (N1.getOpcode() == ISD::UNDEF && + N2.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to // one big BUILD_VECTOR. if (N1.getOpcode() == ISD::BUILD_VECTOR && @@ -3708,8 +3786,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in DebugLoc - std::pair<SDValue,SDValue> CallResult = - TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + TargetLowering:: + CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, TLI.getLibcallCallingConv(RTLIB::MEMCPY), /*isTailCall=*/false, @@ -3717,6 +3795,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), TLI.getPointerTy()), Args, *this, dl); + std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; } @@ -3761,8 +3841,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in DebugLoc - std::pair<SDValue,SDValue> CallResult = - TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + TargetLowering:: + CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, TLI.getLibcallCallingConv(RTLIB::MEMMOVE), /*isTailCall=*/false, @@ -3770,6 +3850,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), TLI.getPointerTy()), Args, *this, dl); + std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; } @@ -3822,8 +3904,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, Entry.isSExt = false; Args.push_back(Entry); // FIXME: pass in DebugLoc - std::pair<SDValue,SDValue> CallResult = - TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + TargetLowering:: + CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, TLI.getLibcallCallingConv(RTLIB::MEMSET), /*isTailCall=*/false, @@ -3831,6 +3913,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), TLI.getPointerTy()), Args, *this, dl); + std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; } @@ -3874,6 +3958,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; AddNodeIDNode(ID, Opcode, VTs, Ops, 4); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -3946,6 +4031,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -4002,6 +4088,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr}; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -4079,6 +4166,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); @@ -4198,6 +4286,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); @@ -4287,6 +4376,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4354,6 +4444,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4378,6 +4469,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -4654,13 +4746,7 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) continue; - bool NoMatch = false; - for (unsigned i = 2; i != NumVTs; ++i) - if (VTs[i] != I->VTs[i]) { - NoMatch = true; - break; - } - if (!NoMatch) + if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2])) return *I; } @@ -5237,11 +5323,7 @@ namespace { /// pointed to by a use iterator is deleted, increment the use iterator /// so that it doesn't dangle. /// -/// This class also manages a "downlink" DAGUpdateListener, to forward -/// messages to ReplaceAllUsesWith's callers. -/// class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { - SelectionDAG::DAGUpdateListener *DownLink; SDNode::use_iterator &UI; SDNode::use_iterator &UE; @@ -5249,21 +5331,13 @@ class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { // Increment the iterator as needed. while (UI != UE && N == *UI) ++UI; - - // Then forward the message. - if (DownLink) DownLink->NodeDeleted(N, E); - } - - virtual void NodeUpdated(SDNode *N) { - // Just forward the message. - if (DownLink) DownLink->NodeUpdated(N); } public: - RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl, + RAUWUpdateListener(SelectionDAG &d, SDNode::use_iterator &ui, SDNode::use_iterator &ue) - : DownLink(dl), UI(ui), UE(ue) {} + : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} }; } @@ -5273,8 +5347,7 @@ public: /// /// This version assumes From has a single result value. /// -void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, - DAGUpdateListener *UpdateListener) { +void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { SDNode *From = FromN.getNode(); assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && "Cannot replace with this method!"); @@ -5288,7 +5361,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // is replaced by To, we don't want to replace of all its users with To // too. See PR3018 for more info. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); - RAUWUpdateListener Listener(UpdateListener, UI, UE); + RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -5307,7 +5380,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, &Listener); + AddModifiedNodeToCSEMaps(User); } // If we just RAUW'd the root, take note. @@ -5321,8 +5394,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, /// This version assumes that for each value of From, there is a /// corresponding value in To in the same position with the same type. /// -void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, - DAGUpdateListener *UpdateListener) { +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { #ifndef NDEBUG for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) assert((!From->hasAnyUseOfValue(i) || @@ -5337,7 +5409,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); - RAUWUpdateListener Listener(UpdateListener, UI, UE); + RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -5356,7 +5428,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, &Listener); + AddModifiedNodeToCSEMaps(User); } // If we just RAUW'd the root, take note. @@ -5369,16 +5441,14 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, /// /// This version can replace From with any result values. To must match the /// number and types of values returned by From. -void SelectionDAG::ReplaceAllUsesWith(SDNode *From, - const SDValue *To, - DAGUpdateListener *UpdateListener) { +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { if (From->getNumValues() == 1) // Handle the simple case efficiently. - return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener); + return ReplaceAllUsesWith(SDValue(From, 0), To[0]); // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); - RAUWUpdateListener Listener(UpdateListener, UI, UE); + RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -5398,7 +5468,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, &Listener); + AddModifiedNodeToCSEMaps(User); } // If we just RAUW'd the root, take note. @@ -5409,14 +5479,13 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The Deleted /// vector is handled the same way as for ReplaceAllUsesWith. -void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, - DAGUpdateListener *UpdateListener){ +void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ // Handle the really simple, really trivial case efficiently. if (From == To) return; // Handle the simple, trivial, case efficiently. if (From.getNode()->getNumValues() == 1) { - ReplaceAllUsesWith(From, To, UpdateListener); + ReplaceAllUsesWith(From, To); return; } @@ -5424,7 +5493,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From.getNode()->use_begin(), UE = From.getNode()->use_end(); - RAUWUpdateListener Listener(UpdateListener, UI, UE); + RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { SDNode *User = *UI; bool UserRemovedFromCSEMaps = false; @@ -5460,7 +5529,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, &Listener); + AddModifiedNodeToCSEMaps(User); } // If we just RAUW'd the root, take note. @@ -5489,11 +5558,10 @@ namespace { /// handled the same way as for ReplaceAllUsesWith. void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To, - unsigned Num, - DAGUpdateListener *UpdateListener){ + unsigned Num){ // Handle the simple, trivial case efficiently. if (Num == 1) - return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener); + return ReplaceAllUsesOfValueWith(*From, *To); // Read up all the uses and make records of them. This helps // processing new uses that are introduced during the @@ -5538,7 +5606,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, UpdateListener); + AddModifiedNodeToCSEMaps(User); } } @@ -5579,7 +5647,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { } } - // Visit all the nodes. As we iterate, moves nodes into sorted order, + // Visit all the nodes. As we iterate, move nodes into sorted order, // such that by the time the end is reached all nodes will be sorted. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { SDNode *N = I; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f1e879b..ba5bd79 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" @@ -42,7 +43,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" @@ -51,6 +51,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/IntegersSubsetMapping.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -843,7 +844,7 @@ void SelectionDAGBuilder::clear() { } /// clearDanglingDebugInfo - Clear the dangling debug information -/// map. This function is seperated from the clear so that debug +/// map. This function is separated from the clear so that debug /// information that is dangling in a basic block can be properly /// resolved in a different basic block. This allows the /// SelectionDAG to resolve dangling debug information attached @@ -941,7 +942,7 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ - case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break; + case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; #include "llvm/Instruction.def" } @@ -1578,17 +1579,18 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); } else { - assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); + assert(CB.CC == ISD::SETCC_INVALID && + "Condition is undefined for to-the-range belonging check."); const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); - - if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { + + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), - ISD::SETLE); + ISD::SETULE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, VT, CmpOp, DAG.getConstant(Low, VT)); @@ -1826,9 +1828,13 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; const Value *Callee(I.getCalledValue()); + const Function *Fn = dyn_cast<Function>(Callee); if (isa<InlineAsm>(Callee)) visitInlineAsm(&I); - else + else if (Fn && Fn->isIntrinsic()) { + assert(Fn->getIntrinsicID() == Intrinsic::donothing); + // Ignore invokes to @llvm.donothing: jump directly to the next BB. + } else LowerCallTo(&I, getValue(Callee), false, LandingPad); // If the value of the invoke is used outside of its defining block, make it @@ -1901,8 +1907,6 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, const Value* SV, MachineBasicBlock *Default, MachineBasicBlock *SwitchBB) { - Case& BackCase = *(CR.Range.second-1); - // Size is the number of Cases represented by this range. size_t Size = CR.Range.second - CR.Range.first; if (Size > 3) @@ -1970,11 +1974,28 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } } + // Order cases by weight so the most likely case will be checked first. + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (BPI) { + for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { + uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), + I->BB->getBasicBlock()); + for (CaseItr J = CR.Range.first; J < I; ++J) { + uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), + J->BB->getBasicBlock()); + if (IWeight > JWeight) + std::swap(*I, *J); + } + } + } // Rearrange the case blocks so that the last one falls through if possible. - if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { + Case &BackCase = *(CR.Range.second-1); + if (Size > 1 && + NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { // The last case block won't fall through into 'NextBlock' if we emit the // branches in this order. See if rearranging a case value would help. - for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) { + // We start at the bottom as it's the case with the least weight. + for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){ if (I->BB == NextBlock) { std::swap(*I, BackCase); break; @@ -2006,7 +2027,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CC = ISD::SETEQ; LHS = SV; RHS = I->High; MHS = NULL; } else { - CC = ISD::SETLE; + CC = ISD::SETCC_INVALID; LHS = I->Low; MHS = SV; RHS = I->High; } @@ -2031,14 +2052,14 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return !TLI.getTargetMachine().Options.DisableJumpTables && + return TLI.supportJumpTables() && (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } static APInt ComputeRange(const APInt &First, const APInt &Last) { uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); + APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth); return (LastExt - FirstExt + 1ULL); } @@ -2104,7 +2125,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); const APInt &High = cast<ConstantInt>(I->High)->getValue(); - if (Low.sle(TEI) && TEI.sle(High)) { + if (Low.ule(TEI) && TEI.ule(High)) { DestBBs.push_back(I->BB); if (TEI==High) ++I; @@ -2261,7 +2282,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == SwitchBB) visitSwitchCase(CB, SwitchBB); @@ -2333,7 +2354,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, // Optimize the case where all the case values fit in a // word without having to subtract minValue. In this case, // we can optimize away the subtraction. - if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { + if (maxValue.ult(IntPtrBits)) { cmpRange = maxValue; } else { lowBound = minValue; @@ -2407,57 +2428,46 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, /// Clusterify - Transform simple list of Cases into list of CaseRange's size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const SwitchInst& SI) { - size_t numCmps = 0; + + /// Use a shorter form of declaration, and also + /// show the we want to use CRSBuilder as Clusterifier. + typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier; + + Clusterifier TheClusterifier; - BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0; - - Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), - SMBB, ExtraWeight)); - } - std::sort(Cases.begin(), Cases.end(), CaseCmp()); - - // Merge case into clusters - if (Cases.size() >= 2) - // Must recompute end() each iteration because it may be - // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); - J != Cases.end(); ) { - const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); - const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); - MachineBasicBlock* nextBB = J->BB; - MachineBasicBlock* currentBB = I->BB; - - // If the two neighboring cases go to the same destination, merge them - // into a single case. - if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { - I->High = J->High; - J = Cases.erase(J); - - if (BranchProbabilityInfo *BPI = FuncInfo.BPI) { - uint32_t CurWeight = currentBB->getBasicBlock() ? - BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16; - uint32_t NextWeight = nextBB->getBasicBlock() ? - BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16; - - BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(), - CurWeight + NextWeight); - } - } else { - I = J++; - } + TheClusterifier.add(i.getCaseValueEx(), SMBB); + } + + TheClusterifier.optimize(); + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + size_t numCmps = 0; + for (Clusterifier::RangeIterator i = TheClusterifier.begin(), + e = TheClusterifier.end(); i != e; ++i, ++numCmps) { + Clusterifier::Cluster &C = *i; + unsigned W = 0; + if (BPI) { + W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock()); + if (!W) + W = 16; + W *= C.first.Weight; + BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W); } - for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { - if (I->Low != I->High) - // A range counts double, since it requires two compares. - ++numCmps; + // FIXME: Currently work with ConstantInt based numbers. + // Changing it to APInt based is a pretty heavy for this commit. + Cases.push_back(Case(C.first.getLow().toConstantInt(), + C.first.getHigh().toConstantInt(), C.second, W)); + + if (C.first.getLow() != C.first.getHigh()) + // A range counts double, since it requires two compares. + ++numCmps; } return numCmps; @@ -2804,7 +2814,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { } // Utility for visitShuffleVector - Return true if every element in Mask, -// begining from position Pos and ending in Pos+Size, falls within the +// beginning from position Pos and ending in Pos+Size, falls within the // specified sequential range [L, L+Pos). or is undef. static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, unsigned Pos, unsigned Size, int Low) { @@ -4914,6 +4924,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::pow: visitPow(I); return 0; + case Intrinsic::fabs: + setValue(&I, DAG.getNode(ISD::FABS, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::floor: + setValue(&I, DAG.getNode(ISD::FFLOOR, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), @@ -4921,6 +4941,29 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return 0; + case Intrinsic::fmuladd: { + EVT VT = TLI.getValueType(I.getType()); + if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && + TLI.isOperationLegal(ISD::FMA, VT) && + TLI.isFMAFasterThanMulAndAdd(VT)){ + setValue(&I, DAG.getNode(ISD::FMA, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + } else { + SDValue Mul = DAG.getNode(ISD::FMUL, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1))); + SDValue Add = DAG.getNode(ISD::FADD, dl, + getValue(I.getArgOperand(0)).getValueType(), + Mul, + getValue(I.getArgOperand(2))); + setValue(&I, Add); + } + return 0; + } case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, MVT::i16, getValue(I.getArgOperand(0)))); @@ -5077,16 +5120,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } TargetLowering::ArgListTy Args; - std::pair<SDValue, SDValue> Result = - TLI.LowerCallTo(getRoot(), I.getType(), + TargetLowering:: + CallLoweringInfo CLI(getRoot(), I.getType(), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), Args, DAG, getCurDebugLoc()); + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return 0; } + case Intrinsic::debugtrap: { + DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot())); + return 0; + } case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::usub_with_overflow: @@ -5139,6 +5187,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::lifetime_end: // Discard region information. return 0; + case Intrinsic::donothing: + // ignore + return 0; } } @@ -5157,14 +5208,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - SmallVector<uint64_t, 4> Offsets; GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), - Outs, TLI, &Offsets); + Outs, TLI); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - DAG.getMachineFunction(), - FTy->isVarArg(), Outs, - FTy->getContext()); + DAG.getMachineFunction(), + FTy->isVarArg(), Outs, + FTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; @@ -5247,16 +5297,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (isTailCall && TM.Options.EnableFastISel) isTailCall = false; - std::pair<SDValue,SDValue> Result = - TLI.LowerCallTo(getRoot(), RetTy, - CS.paramHasAttr(0, Attribute::SExt), - CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), - CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), - CS.getCallingConv(), - isTailCall, - CS.doesNotReturn(), - !CS.getInstruction()->use_empty(), - Callee, Args, DAG, getCurDebugLoc()); + TargetLowering:: + CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, + getCurDebugLoc(), CS); + std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && @@ -5272,7 +5316,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, ComputeValueVTs(TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; - unsigned NumValues = Outs.size(); + + SmallVector<EVT, 4> RetTys; + SmallVector<uint64_t, 4> Offsets; + RetTy = FTy->getReturnType(); + ComputeValueVTs(TLI, RetTy, RetTys, &Offsets); + + unsigned NumValues = RetTys.size(); SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(NumValues); @@ -5280,8 +5330,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, - Add, + SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, false, false, 1); Values[i] = L; @@ -5292,30 +5341,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, MVT::Other, &Chains[0], NumValues); PendingLoads.push_back(Chain); - // Collect the legal value parts into potentially illegal values - // that correspond to the original function's return values. - SmallVector<EVT, 4> RetTys; - RetTy = FTy->getReturnType(); - ComputeValueVTs(TLI, RetTy, RetTys); - ISD::NodeType AssertOp = ISD::DELETED_NODE; - SmallVector<SDValue, 4> ReturnValues; - unsigned CurReg = 0; - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT); - unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT); - - SDValue ReturnValue = - getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs, - RegisterVT, VT, AssertOp); - ReturnValues.push_back(ReturnValue); - CurReg += NumRegs; - } - setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&RetTys[0], RetTys.size()), - &ReturnValues[0], ReturnValues.size())); + &Values[0], Values.size())); } // Assign order to nodes here. If the call does not produce a result, it won't @@ -5482,6 +5511,22 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; } +/// visitUnaryFloatCall - If a call instruction is a unary floating-point +/// operation (as expected), translate it to an SDNode with the specified opcode +/// and return true. +bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, + unsigned Opcode) { + // Sanity check that it really is a unary floating-point call. + if (I.getNumArgOperands() != 1 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + I.getType() != I.getArgOperand(0)->getType() || + !I.onlyReadsMemory()) + return false; + + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp)); + return true; +} void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. @@ -5512,150 +5557,97 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. - if (!F->hasLocalLinkage() && F->hasName()) { - StringRef Name = F->getName(); - if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") || - (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") || - (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) { + LibFunc::Func Func; + if (!F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + switch (Func) { + default: break; + case LibFunc::copysign: + case LibFunc::copysignf: + case LibFunc::copysignl: if (I.getNumArgOperands() == 2 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && - I.getType() == I.getArgOperand(1)->getType()) { + I.getType() == I.getArgOperand(1)->getType() && + I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), LHS.getValueType(), LHS, RHS)); return; } - } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") || - (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") || - (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + if (visitUnaryFloatCall(I, ISD::FABS)) return; - } - } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") || - (LibInfo->has(LibFunc::sinf) && Name == "sinf") || - (LibInfo->has(LibFunc::sinl) && Name == "sinl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::sin: + case LibFunc::sinf: + case LibFunc::sinl: + if (visitUnaryFloatCall(I, ISD::FSIN)) return; - } - } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") || - (LibInfo->has(LibFunc::cosf) && Name == "cosf") || - (LibInfo->has(LibFunc::cosl) && Name == "cosl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::cos: + case LibFunc::cosf: + case LibFunc::cosl: + if (visitUnaryFloatCall(I, ISD::FCOS)) return; - } - } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") || - (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") || - (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::sqrt: + case LibFunc::sqrtf: + case LibFunc::sqrtl: + if (visitUnaryFloatCall(I, ISD::FSQRT)) return; - } - } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") || - (LibInfo->has(LibFunc::floorf) && Name == "floorf") || - (LibInfo->has(LibFunc::floorl) && Name == "floorl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; - } - } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") || - (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") || - (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; - } - } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") || - (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") || - (LibInfo->has(LibFunc::ceill) && Name == "ceill")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + if (visitUnaryFloatCall(I, ISD::FCEIL)) return; - } - } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") || - (LibInfo->has(LibFunc::rintf) && Name == "rintf") || - (LibInfo->has(LibFunc::rintl) && Name == "rintl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + if (visitUnaryFloatCall(I, ISD::FRINT)) return; - } - } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") || - (LibInfo->has(LibFunc::truncf) && Name == "truncf") || - (LibInfo->has(LibFunc::truncl) && Name == "truncl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; - } - } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") || - (LibInfo->has(LibFunc::log2f) && Name == "log2f") || - (LibInfo->has(LibFunc::log2l) && Name == "log2l")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::log2: + case LibFunc::log2f: + case LibFunc::log2l: + if (visitUnaryFloatCall(I, ISD::FLOG2)) return; - } - } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") || - (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") || - (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::exp2: + case LibFunc::exp2f: + case LibFunc::exp2l: + if (visitUnaryFloatCall(I, ISD::FEXP2)) return; - } - } else if (Name == "memcmp") { + break; + case LibFunc::memcmp: if (visitMemCmpCall(I)) return; + break; } } } @@ -5952,11 +5944,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair<unsigned, const TargetRegisterClass*> MatchRC = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass*> InputRC = - TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, + std::pair<unsigned, const TargetRegisterClass*> InputRC = + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || @@ -6225,8 +6217,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); - assert(!OpInfo.isIndirect && - "Don't know how to handle indirect register inputs yet!"); + + // TODO: Support this. + if (OpInfo.isIndirect) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "Don't know how to handle indirect register inputs yet " + "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); + break; + } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { @@ -6369,24 +6368,18 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// FIXME: When all targets are /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair<SDValue, SDValue> -TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, - bool RetSExt, bool RetZExt, bool isVarArg, - bool isInreg, unsigned NumFixedArgs, - CallingConv::ID CallConv, bool isTailCall, - bool doesNotRet, bool isReturnValueUsed, - SDValue Callee, - ArgListTy &Args, SelectionDAG &DAG, - DebugLoc dl) const { +TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Handle all of the outgoing arguments. - SmallVector<ISD::OutputArg, 32> Outs; - SmallVector<SDValue, 32> OutVals; + CLI.Outs.clear(); + CLI.OutVals.clear(); + ArgListTy &Args = CLI.Args; for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; - Type *ArgTy = VT.getTypeForEVT(RetTy->getContext()); + Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; @@ -6419,8 +6412,8 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); - EVT PartVT = getRegisterType(RetTy->getContext(), VT); - unsigned NumParts = getNumRegisters(RetTy->getContext(), VT); + EVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); SmallVector<SDValue, 4> Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -6429,89 +6422,88 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, else if (Args[i].isZExt) ExtendKind = ISD::ZERO_EXTEND; - getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), - i < NumFixedArgs); + i < CLI.NumFixedArgs); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) MyFlags.Flags.setOrigAlign(1); - Outs.push_back(MyFlags); - OutVals.push_back(Parts[j]); + CLI.Outs.push_back(MyFlags); + CLI.OutVals.push_back(Parts[j]); } } } // Handle the incoming return values from the call. - SmallVector<ISD::InputArg, 32> Ins; + CLI.Ins.clear(); SmallVector<EVT, 4> RetTys; - ComputeValueVTs(*this, RetTy, RetTys); + ComputeValueVTs(*this, CLI.RetTy, RetTys); for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); + EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT.getSimpleVT(); - MyFlags.Used = isReturnValueUsed; - if (RetSExt) + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) MyFlags.Flags.setSExt(); - if (RetZExt) + if (CLI.RetZExt) MyFlags.Flags.setZExt(); - if (isInreg) + if (CLI.IsInReg) MyFlags.Flags.setInReg(); - Ins.push_back(MyFlags); + CLI.Ins.push_back(MyFlags); } } SmallVector<SDValue, 4> InVals; - Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall, - Outs, OutVals, Ins, dl, DAG, InVals); + CLI.Chain = LowerCall(CLI, InVals); // Verify that the target's LowerCall behaved as expected. - assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && "LowerCall didn't return a valid chain!"); - assert((!isTailCall || InVals.empty()) && + assert((!CLI.IsTailCall || InVals.empty()) && "LowerCall emitted a return value for a tail call!"); - assert((isTailCall || InVals.size() == Ins.size()) && + assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && "LowerCall didn't emit the correct number of values!"); // For a tail call, the return value is merely live-out and there aren't // any nodes in the DAG representing it. Return a special value to // indicate that a tail call has been emitted and no more Instructions // should be processed in the current block. - if (isTailCall) { - DAG.setRoot(Chain); + if (CLI.IsTailCall) { + CLI.DAG.setRoot(CLI.Chain); return std::make_pair(SDValue(), SDValue()); } - DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerCall emitted a null value!"); - assert(EVT(Ins[i].VT) == InVals[i].getValueType() && + assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && "LowerCall emitted a value with the wrong type!"); }); // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (RetSExt) + if (CLI.RetSExt) AssertOp = ISD::AssertSext; - else if (RetZExt) + else if (CLI.RetZExt) AssertOp = ISD::AssertZext; SmallVector<SDValue, 4> ReturnValues; unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); + EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg], + ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, AssertOp)); CurReg += NumRegs; @@ -6521,12 +6513,12 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, // such a node, so we just return a null return value in that case. In // that case, nothing will actually look at the value. if (ReturnValues.empty()) - return std::make_pair(SDValue(), Chain); + return std::make_pair(SDValue(), CLI.Chain); - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&RetTys[0], RetTys.size()), + SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, + CLI.DAG.getVTList(&RetTys[0], RetTys.size()), &ReturnValues[0], ReturnValues.size()); - return std::make_pair(Res, Chain); + return std::make_pair(Res, CLI.Chain); } void TargetLowering::LowerOperationWrapper(SDNode *N, @@ -6746,7 +6738,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // Note down frame index. if (FrameIndexSDNode *FI = - dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) + dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8393b41..4090002 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -180,17 +180,6 @@ private: typedef std::vector<CaseRec> CaseRecVector; - /// The comparison function for sorting the switch case values in the vector. - /// WARNING: Case ranges should be disjoint! - struct CaseCmp { - bool operator()(const Case &C1, const Case &C2) { - assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); - const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); - const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); - return CI1->getValue().slt(CI2->getValue()); - } - }; - struct CaseBitsCmp { bool operator()(const CaseBits &C1, const CaseBits &C2) { return C1.Bits > C2.Bits; @@ -351,7 +340,7 @@ public: void clear(); /// clearDanglingDebugInfo - Clear the dangling debug information - /// map. This function is seperated from the clear so that debug + /// map. This function is separated from the clear so that debug /// information that is dangling in a basic block can be properly /// resolved in a different basic block. This allows the /// SelectionDAG to resolve dangling debug information attached @@ -531,6 +520,7 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index f981afb..13cd011 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "ScheduleDAGSDNodes.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" #include "llvm/Assembly/Writer.h" @@ -19,7 +20,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" @@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; case ISD::ConstantPool: return "ConstantPool"; + case ISD::TargetIndex: return "TargetIndex"; case ISD::ExternalSymbol: return "ExternalSymbol"; case ISD::BlockAddress: return "BlockAddress"; case ISD::INTRINSIC_WO_CHAIN: @@ -265,6 +266,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STACKSAVE: return "stacksave"; case ISD::STACKRESTORE: return "stackrestore"; case ISD::TRAP: return "trap"; + case ISD::DEBUGTRAP: return "debugtrap"; // Bit manipulation case ISD::BSWAP: return "bswap"; @@ -408,6 +410,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " " << offset; if (unsigned int TF = CP->getTargetFlags()) OS << " [TF=" << TF << ']'; + } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) { + OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">"; + if (unsigned TF = TI->getTargetFlags()) + OS << " [TF=" << TF << ']'; } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { OS << "<"; const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 605509b..4e5e3ba 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -14,12 +14,8 @@ #define DEBUG_TYPE "isel" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" @@ -27,7 +23,10 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -38,6 +37,7 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -263,8 +263,6 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -void SelectionDAGISel::ISelUpdater::anchor() { } - SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), @@ -451,9 +449,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } } } - done:; } + done: // Determine if there is a call to setjmp in the machine function. MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); @@ -468,8 +466,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // If To is also scheduled to be replaced, find what its ultimate // replacement is. for (;;) { - DenseMap<unsigned, unsigned>::iterator J = - FuncInfo->RegFixups.find(To); + DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To); if (J == E) break; To = J->second; } @@ -703,6 +700,25 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->clear(); } +namespace { +/// ISelUpdater - helper class to handle updates of the instruction selection +/// graph. +class ISelUpdater : public SelectionDAG::DAGUpdateListener { + SelectionDAG::allnodes_iterator &ISelPosition; +public: + ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp) + : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {} + + /// NodeDeleted - Handle nodes deleted from the graph. If the node being + /// deleted is the current ISelPosition node, update ISelPosition. + /// + virtual void NodeDeleted(SDNode *N, SDNode *E) { + if (ISelPosition == SelectionDAG::allnodes_iterator(N)) + ++ISelPosition; + } +}; +} // end anonymous namespace + void SelectionDAGISel::DoInstructionSelection() { DEBUG(errs() << "===== Instruction selection begins: BB#" << FuncInfo->MBB->getNumber() @@ -719,9 +735,13 @@ void SelectionDAGISel::DoInstructionSelection() { // a reference to the root node, preventing it from being deleted, // and tracking any changes of the root. HandleSDNode Dummy(CurDAG->getRoot()); - ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode()); + SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode()); ++ISelPosition; + // Make sure that ISelPosition gets properly updated when nodes are deleted + // in calls made from this function. + ISelUpdater ISU(*CurDAG, ISelPosition); + // The AllNodes list is now topological-sorted. Visit the // nodes by starting at the end of the list (the root of the // graph) and preceding back toward the beginning (the entry @@ -748,10 +768,8 @@ void SelectionDAGISel::DoInstructionSelection() { // If after the replacement this node is not used any more, // remove this dead node. - if (Node->use_empty()) { // Don't delete EntryToken, etc. - ISelUpdater ISU(ISelPosition); - CurDAG->RemoveDeadNode(Node, &ISU); - } + if (Node->use_empty()) // Don't delete EntryToken, etc. + CurDAG->RemoveDeadNode(Node); } CurDAG->setRoot(Dummy.getValue()); @@ -961,7 +979,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (TM.Options.EnableFastISel) - FastIS = TLI.createFastISel(*FuncInfo); + FastIS = TLI.createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal<const Function*> RPOT(&Fn); @@ -1680,8 +1698,6 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, bool isMorphNodeTo) { SmallVector<SDNode*, 4> NowDeadNodes; - ISelUpdater ISU(ISelPosition); - // Now that all the normal results are replaced, we replace the chain and // glue results if present. if (!ChainNodesMatched.empty()) { @@ -1705,7 +1721,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, if (ChainVal.getValueType() == MVT::Glue) ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); - CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU); + CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); // If the node became dead and we haven't already seen it, delete it. if (ChainNode->use_empty() && @@ -1728,7 +1744,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue && "Doesn't have a glue result"); CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), - InputGlue, &ISU); + InputGlue); // If the node became dead and we haven't already seen it, delete it. if (FRN->use_empty() && @@ -1738,7 +1754,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, } if (!NowDeadNodes.empty()) - CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU); + CurDAG->RemoveDeadNodes(NowDeadNodes); DEBUG(errs() << "ISEL: Match complete!\n"); } @@ -1759,7 +1775,7 @@ enum ChainResult { /// The walk we do here is guaranteed to be small because we quickly get down to /// already selected nodes "below" us. static ChainResult -WalkChainUsers(SDNode *ChainedNode, +WalkChainUsers(const SDNode *ChainedNode, SmallVectorImpl<SDNode*> &ChainedNodesInPattern, SmallVectorImpl<SDNode*> &InteriorChainedNodes) { ChainResult Result = CR_Simple; @@ -1992,14 +2008,14 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SelectionDAGISel &SDISel) { + const SelectionDAGISel &SDISel) { return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); } /// CheckNodePredicate - Implements OP_CheckNodePredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SelectionDAGISel &SDISel, SDNode *N) { + const SelectionDAGISel &SDISel, SDNode *N) { return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); } @@ -2062,7 +2078,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, SelectionDAGISel &SDISel) { + SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); @@ -2075,7 +2091,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, SelectionDAGISel &SDISel) { + SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); @@ -2094,7 +2110,8 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, /// MatcherIndex to continue with. static unsigned IsPredicateKnownToFail(const unsigned char *Table, unsigned Index, SDValue N, - bool &Result, SelectionDAGISel &SDISel, + bool &Result, + const SelectionDAGISel &SDISel, SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) { switch (Table[Index++]) { default: @@ -2759,9 +2776,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, (SDNode*) 0)); } - } else { + } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) { Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(), EmitNodeInfo); + } else { + // NodeToMatch was eliminated by CSE when the target changed the DAG. + // We will visit the equivalent node later. + DEBUG(dbgs() << "Node was eliminated by CSE\n"); + return 0; } // If the node had chain/glue results, update our notion of the current @@ -2959,6 +2981,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && N->getOpcode() != ISD::INTRINSIC_VOID) { N->printrFull(Msg, CurDAG); + Msg << "\nIn function: " << MF->getFunction()->getName(); } else { bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; unsigned iid = diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 6cde05a..173ffac 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -13,13 +13,13 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e341e15..f0c50c1 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -32,13 +33,6 @@ #include <cctype> using namespace llvm; -/// We are in the process of implementing a new TypeLegalization action -/// - the promotion of vector elements. This feature is disabled by default -/// and only enabled using this flag. -static cl::opt<bool> -AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true), - cl::desc("Allow promotion of integer vector element types")); - /// InitLibcallNames - Set default libcall names. /// static void InitLibcallNames(const char **Names) { @@ -521,8 +515,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { /// NOTE: The constructor takes ownership of TLOF. TargetLowering::TargetLowering(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getTargetData()), TLOF(*tlof), - mayPromoteElements(AllowPromoteIntElem) { + : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); @@ -604,6 +597,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, IntDivIsCheap = false; Pow2DivIsCheap = false; JumpIsExpensive = false; + predictableSelectIsExpensive = false; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; @@ -618,6 +612,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, MinStackArgumentAlignment = 1; ShouldFoldAtomicFences = false; InsertFencesForAtomic = false; + SupportJumpTables = true; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); @@ -708,42 +703,34 @@ bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const { return false; } -/// hasLegalSuperRegRegClasses - Return true if the specified register class -/// has one or more super-reg register classes that are legal. -bool -TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{ - if (*RC->superregclasses_begin() == 0) - return false; - for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(), - E = RC->superregclasses_end(); I != E; ++I) { - const TargetRegisterClass *RRC = *I; - if (isLegalRC(RRC)) - return true; - } - return false; -} - /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". std::pair<const TargetRegisterClass*, uint8_t> TargetLowering::findRepresentativeClass(EVT VT) const { + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy]; if (!RC) return std::make_pair(RC, 0); + + // Compute the set of all super-register classes. + BitVector SuperRegRC(TRI->getNumRegClasses()); + for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) + SuperRegRC.setBitsInMask(RCI.getMask()); + + // Find the first legal register class with the largest spill size. const TargetRegisterClass *BestRC = RC; - for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(), - E = RC->superregclasses_end(); I != E; ++I) { - const TargetRegisterClass *RRC = *I; - if (RRC->isASubClass() || !isLegalRC(RRC)) + for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { + const TargetRegisterClass *SuperRC = TRI->getRegClass(i); + // We want the largest possible spill size. + if (SuperRC->getSize() <= BestRC->getSize()) + continue; + if (!isLegalRC(SuperRC)) continue; - if (!hasLegalSuperRegRegClasses(RRC)) - return std::make_pair(RRC, 1); - BestRC = RRC; + BestRC = SuperRC; } return std::make_pair(BestRC, 1); } - /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. void TargetLowering::computeRegisterProperties() { @@ -835,11 +822,8 @@ void TargetLowering::computeRegisterProperties() { unsigned NElts = VT.getVectorNumElements(); if (NElts != 1) { bool IsLegalWiderType = false; - // If we allow the promotion of vector elements using a flag, - // then return TypePromoteInteger on vector elements. // First try to promote the elements of integer vectors. If no legal // promotion was found, fallback to the widen-vector method. - if (mayPromoteElements) for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { EVT SVT = (MVT::SimpleValueType)nVT; // Promote vectors of integers to vectors with the same number @@ -940,9 +924,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, unsigned NumElts = VT.getVectorNumElements(); // If there is a wider vector type with the same element type as this one, - // we should widen to that legal vector type. This handles things like - // <2 x float> -> <4 x float>. - if (NumElts != 1 && getTypeAction(Context, VT) == TypeWidenVector) { + // or a promoted vector type that has the same number of elements which + // are wider, then we should convert to that legal vector type. + // This handles things like <2 x float> -> <4 x float> and + // <4 x i1> -> <4 x i32>. + LegalizeTypeAction TA = getTypeAction(Context, VT); + if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { RegisterVT = getTypeToTransformTo(Context, VT); if (isTypeLegal(RegisterVT)) { IntermediateVT = RegisterVT; @@ -1000,13 +987,11 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, /// TODO: Move this out of TargetLowering.cpp. void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, SmallVectorImpl<ISD::OutputArg> &Outs, - const TargetLowering &TLI, - SmallVectorImpl<uint64_t> *Offsets) { + const TargetLowering &TLI) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, ReturnType, ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; - unsigned Offset = 0; for (unsigned j = 0, f = NumValues; j != f; ++j) { EVT VT = ValueVTs[j]; @@ -1029,8 +1014,6 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); - unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( - PartVT.getTypeForEVT(ReturnType->getContext())); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -1045,10 +1028,6 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); - if (Offsets) { - Offsets->push_back(Offset); - Offset += PartSize; - } } } } @@ -2019,7 +1998,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } - // Make sure we're not loosing bits from the constant. + // Make sure we're not losing bits from the constant. if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) { EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { @@ -2343,6 +2322,55 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } } + + if (C1.getMinSignedBits() <= 64 && + !isLegalICmpImmediate(C1.getSExtValue())) { + // (X & -256) == 256 -> (X >> 8) == 1 + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getOpcode() == ISD::AND && N0.hasOneUse()) { + if (ConstantSDNode *AndRHS = + dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + const APInt &AndRHSC = AndRHS->getAPIntValue(); + if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { + unsigned ShiftBits = AndRHSC.countTrailingZeros(); + EVT ShiftTy = DCI.isBeforeLegalize() ? + getPointerTy() : getShiftAmountTy(N0.getValueType()); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), + DAG.getConstant(ShiftBits, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + } + } + } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || + Cond == ISD::SETULE || Cond == ISD::SETUGT) { + bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT); + // X < 0x100000000 -> (X >> 32) < 1 + // X >= 0x100000000 -> (X >> 32) >= 1 + // X <= 0x0ffffffff -> (X >> 32) < 1 + // X > 0x0ffffffff -> (X >> 32) >= 1 + unsigned ShiftBits; + APInt NewC = C1; + ISD::CondCode NewCond = Cond; + if (AdjOne) { + ShiftBits = C1.countTrailingOnes(); + NewC = NewC + 1; + NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + } else { + ShiftBits = C1.countTrailingZeros(); + } + NewC = NewC.lshr(ShiftBits); + if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { + EVT ShiftTy = DCI.isBeforeLegalize() ? + getPointerTy() : getShiftAmountTy(N0.getValueType()); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, + DAG.getConstant(ShiftBits, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(NewC, CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); + } + } + } } if (isa<ConstantFPSDNode>(N0.getNode())) { @@ -2411,25 +2439,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (N0 == N1) { + // The sext(setcc()) => setcc() optimization relies on the appropriate + // constant being emitted. + uint64_t EqVal; + switch (getBooleanContents(N0.getValueType().isVector())) { + case UndefinedBooleanContent: + case ZeroOrOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond); + break; + case ZeroOrNegativeOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0; + break; + } + // We can always fold X == X for integer setcc's. if (N0.getValueType().isInteger()) { - switch (getBooleanContents(N0.getValueType().isVector())) { - case UndefinedBooleanContent: - case ZeroOrOneBooleanContent: - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); - case ZeroOrNegativeOneBooleanContent: - return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT); - } + return DAG.getConstant(EqVal, VT); } unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + return DAG.getConstant(EqVal, VT); if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) - return DAG.getConstant(UOF, VT); + return DAG.getConstant(EqVal, VT); // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; - if (NewCond != Cond) + if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || + getCondCodeAction(NewCond, N0.getValueType()) == Legal)) return DAG.getSetCC(dl, VT, N0, N1, NewCond); } @@ -2998,10 +3034,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair<unsigned, const TargetRegisterClass*> MatchRC = - getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass*> InputRC = - getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + getRegForInlineAsmConstraint(Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 0016047..8a6b120 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -26,13 +26,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "shadowstackgc" -#include "llvm/CodeGen/GCs.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/IRBuilder.h" using namespace llvm; diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 9a86f32..980bd74 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -13,28 +13,28 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sjljehprepare" -#include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include <set> using namespace llvm; diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 26cf259..c8c3fb3 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -62,7 +62,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { assert(mi2iMap.empty() && "MachineInstr -> Index mapping non-empty at initial numbering?"); - functionSize = 0; unsigned index = 0; MBBRanges.resize(mf->getNumBlockIDs()); idx2MBBMap.reserve(mf->size()); @@ -89,8 +88,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { // Save this base index in the maps. mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(), SlotIndex::Slot_Block))); - - ++functionSize; } // We insert one blank instructions between basic blocks. diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 6f33f54..320128a 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -207,6 +207,17 @@ void SpillPlacement::activate(unsigned n) { return; ActiveNodes->set(n); nodes[n].clear(); + + // Very large bundles usually come from big switches, indirect branches, + // landing pads, or loops with many 'continue' statements. It is difficult to + // allocate registers when so many different blocks are involved. + // + // Give a small negative bias to large bundles such that 1/32 of the + // connected blocks need to be interested before we consider expanding the + // region through the bundle. This helps compile time by limiting the number + // of blocks visited and the number of links in the Hopfield network. + if (bundles->getBlocks(n).size() > 100) + nodes[n].Bias = -0.0625f; } diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 9959f74..4a2b7ec 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -345,9 +345,11 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { Values.clear(); // Reset the LiveRangeCalc instances needed for this spill mode. - LRCalc[0].reset(&VRM.getMachineFunction()); + LRCalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); if (SpillMode) - LRCalc[1].reset(&VRM.getMachineFunction()); + LRCalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); // We don't need an AliasAnalysis since we will only be performing // cheap-as-a-copy remats anyway. @@ -650,7 +652,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { // Adjust RegAssign if a register assignment is killed at VNI->def. We // want to avoid calculating the live range of the source register if // possible. - AssignI.find(VNI->def.getPrevSlot()); + AssignI.find(Def.getPrevSlot()); if (!AssignI.valid() || AssignI.start() >= Def) continue; // If MI doesn't kill the assigned register, just leave it. @@ -737,6 +739,8 @@ void SplitEditor::hoistCopiesForSize() { for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); VI != VE; ++VI) { VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); assert(ParentVNI && "Parent not live at complement def"); @@ -810,6 +814,8 @@ void SplitEditor::hoistCopiesForSize() { for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); VI != VE; ++VI) { VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); const DomPair &Dom = NearestDom[ParentVNI->id]; if (!Dom.first || Dom.second == VNI->def) @@ -924,11 +930,9 @@ bool SplitEditor::transferValues() { DEBUG(dbgs() << '\n'); } - LRCalc[0].calculateValues(LIS.getSlotIndexes(), &MDT, - &LIS.getVNInfoAllocator()); + LRCalc[0].calculateValues(); if (SpillMode) - LRCalc[1].calculateValues(LIS.getSlotIndexes(), &MDT, - &LIS.getVNInfoAllocator()); + LRCalc[1].calculateValues(); return Skipped; } @@ -953,8 +957,7 @@ void SplitEditor::extendPHIKillRanges() { if (Edit->getParent().liveAt(LastUse)) { assert(RegAssign.lookup(LastUse) == RegIdx && "Different register assignment in phi predecessor"); - LRC.extend(LI, End, - LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); + LRC.extend(LI, End); } } } @@ -1004,8 +1007,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { } else Idx = Idx.getRegSlot(true); - getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(), - &MDT, &LIS.getVNInfoAllocator()); + getLRCalc(RegIdx).extend(LI, Idx.getNextSlot()); } } @@ -1049,8 +1051,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { if (ParentVNI->isUnused()) continue; unsigned RegIdx = RegAssign.lookup(ParentVNI->def); - VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def); - VNI->setIsPHIDef(ParentVNI->isPHIDef()); + defValue(RegIdx, ParentVNI, ParentVNI->def); // Force rematted values to be recomputed everywhere. // The new live ranges may be truncated. diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 43a6ad8..f1eab1f 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/Triple.h" using namespace llvm; // SSPBufferSize - The lower bound for a buffer to be considered for stack @@ -46,7 +47,7 @@ namespace { Function *F; Module *M; - DominatorTree* DT; + DominatorTree *DT; /// InsertStackProtectors - Insert code into the prologue and epilogue of /// the function. @@ -70,8 +71,8 @@ namespace { } StackProtector(const TargetLowering *tli) : FunctionPass(ID), TLI(tli) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } + initializeStackProtectorPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); @@ -95,7 +96,7 @@ bool StackProtector::runOnFunction(Function &Fn) { DT = getAnalysisIfAvailable<DominatorTree>(); if (!RequiresStackProtector()) return false; - + return InsertStackProtectors(); } @@ -111,6 +112,8 @@ bool StackProtector::RequiresStackProtector() const { return false; const TargetData *TD = TLI->getTargetData(); + const TargetMachine &TM = TLI->getTargetMachine(); + Triple Trip(TM.getTargetTriple()); for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; @@ -123,11 +126,17 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) + if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) { + // If we're on a non-Darwin platform, don't add stack protectors + // unless the array is a character array. + if (!Trip.isOSDarwin() && !AT->getElementType()->isIntegerTy(8)) + continue; + // If an array has more than SSPBufferSize bytes of allocated space, // then we emit stack protectors. if (SSPBufferSize <= TD->getTypeAllocSize(AT)) return true; + } } } @@ -159,17 +168,17 @@ bool StackProtector::InsertStackProtectors() { // StackGuardSlot = alloca i8* // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) - // + // PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); unsigned AddressSpace, Offset; if (TLI->getStackCookieLocation(AddressSpace, Offset)) { Constant *OffsetVal = ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - + StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy, AddressSpace)); } else { - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); } BasicBlock &Entry = F->getEntryBlock(); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 1e940b1..20da36e 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -46,7 +46,6 @@ STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); namespace { class StackSlotColoring : public MachineFunctionPass { - bool ColorWithRegs; LiveStacks* LS; MachineFrameInfo *MFI; const TargetInstrInfo *TII; @@ -82,7 +81,7 @@ namespace { public: static char ID; // Pass identification StackSlotColoring() : - MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) { + MachineFunctionPass(ID), NextColor(-1) { initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); } diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index c6fdc73..5b06195 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -672,8 +672,8 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, LiveInterval &SrcInterval = LI->getInterval(SrcReg); SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); + (void)SrcVNI; assert(SrcVNI); - SrcVNI->setHasPHIKill(true); continue; } @@ -744,7 +744,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex PHIIndex = LI->getInstructionIndex(PHI); VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); assert(DestVNI); - DestVNI->setIsPHIDef(true); // Prior to PHI elimination, the live ranges of PHIs begin at their defining // instruction. After PHI elimination, PHI instructions are replaced by VNs @@ -777,7 +776,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, LI->getVNInfoAllocator()); - CopyVNI->setIsPHIDef(true); CopyLI.addRange(LiveRange(MBBStartIndex, DestCopyIndex.getRegSlot(), CopyVNI)); diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 8ebfbca..a813fa6 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -20,12 +20,15 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" @@ -57,8 +60,10 @@ namespace { /// TailDuplicatePass - Perform tail duplication. class TailDuplicatePass : public MachineFunctionPass { const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; MachineModuleInfo *MMI; MachineRegisterInfo *MRI; + OwningPtr<RegScavenger> RS; bool PreRegAlloc; // SSAUpdateVRs - A list of virtual registers for which to update SSA form. @@ -124,9 +129,13 @@ INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); MRI = &MF.getRegInfo(); MMI = getAnalysisIfAvailable<MachineModuleInfo>(); PreRegAlloc = MRI->isSSA(); + RS.reset(); + if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) + RS.reset(new RegScavenger()); bool MadeChange = false; while (TailDuplicateBlocks(MF)) @@ -272,8 +281,8 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, continue; unsigned Dst = Copy->getOperand(0).getReg(); unsigned Src = Copy->getOperand(1).getReg(); - MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); - if (++UI == MRI->use_end()) { + if (MRI->hasOneNonDBGUse(Src) && + MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { // Copy is the only use. Do trivial copy propagation here. MRI->replaceRegWith(Dst, Src); Copy->eraseFromParent(); @@ -429,8 +438,10 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, AddSSAUpdateEntry(Reg, NewReg, PredBB); } else { DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); - if (VI != LocalVRMap.end()) + if (VI != LocalVRMap.end()) { MO.setReg(VI->second); + MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg)); + } } } PredBB->insert(PredBB->instr_end(), NewMI); @@ -775,6 +786,23 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // Remove PredBB's unconditional branch. TII->RemoveBranch(*PredBB); + if (RS && !TailBB->livein_empty()) { + // Update PredBB livein. + RS->enterBasicBlock(PredBB); + if (!PredBB->empty()) + RS->forward(prior(PredBB->end())); + BitVector RegsLiveAtExit(TRI->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), + E = TailBB->livein_end(); I != E; ++I) { + if (!RegsLiveAtExit[*I]) + // If a register is previously livein to the tail but it's not live + // at the end of predecessor BB, then it should be added to its + // livein list. + PredBB->addLiveIn(*I); + } + } + // Clone the contents of TailBB into PredBB. DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 2beb928..ddee6b2 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -501,6 +501,14 @@ CreateTargetHazardRecognizer(const TargetMachine *TM, return new ScheduleHazardRecognizer(); } +// Default implementation of CreateTargetMIHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfoImpl:: +CreateTargetMIHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return (ScheduleHazardRecognizer *) + new ScoreboardHazardRecognizer(II, DAG, "misched"); +} + // Default implementation of CreateTargetPostRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfoImpl:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, @@ -509,6 +517,10 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); } +//===----------------------------------------------------------------------===// +// SelectionDAG latency interface. +//===----------------------------------------------------------------------===// + int TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, @@ -537,3 +549,201 @@ int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData, return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); } +//===----------------------------------------------------------------------===// +// MachineInstr latency interface. +//===----------------------------------------------------------------------===// + +unsigned +TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr *MI) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + unsigned Class = MI->getDesc().getSchedClass(); + int UOps = ItinData->Itineraries[Class].NumMicroOps; + if (UOps >= 0) + return UOps; + + // The # of u-ops is dynamically determined. The specific target should + // override this function to return the right number. + return 1; +} + +/// Return the default expected latency for a def based on it's opcode. +unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, + const MachineInstr *DefMI) const { + if (DefMI->mayLoad()) + return SchedModel->LoadLatency; + if (isHighLatencyDef(DefMI->getOpcode())) + return SchedModel->HighLatency; + return 1; +} + +unsigned TargetInstrInfoImpl:: +getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + // Default to one cycle for no itinerary. However, an "empty" itinerary may + // still have a MinLatency property, which getStageLatency checks. + if (!ItinData) + return MI->mayLoad() ? 2 : 1; + + return ItinData->getStageLatency(MI->getDesc().getSchedClass()); +} + +bool TargetInstrInfoImpl::hasLowDefLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, + unsigned DefIdx) const { + if (!ItinData || ItinData->isEmpty()) + return false; + + unsigned DefClass = DefMI->getDesc().getSchedClass(); + int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); + return (DefCycle != -1 && DefCycle <= 1); +} + +/// Both DefMI and UseMI must be valid. By default, call directly to the +/// itinerary. This may be overriden by the target. +int TargetInstrInfoImpl:: +getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + unsigned UseClass = UseMI->getDesc().getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +/// If we can determine the operand latency from the def only, without itinerary +/// lookup, do so. Otherwise return -1. +static int computeDefOperandLatency( + const TargetInstrInfo *TII, const InstrItineraryData *ItinData, + const MachineInstr *DefMI, bool FindMin) { + + // Let the target hook getInstrLatency handle missing itineraries. + if (!ItinData) + return TII->getInstrLatency(ItinData, DefMI); + + // Return a latency based on the itinerary properties and defining instruction + // if possible. Some common subtargets don't require per-operand latency, + // especially for minimum latencies. + if (FindMin) { + // If MinLatency is valid, call getInstrLatency. This uses Stage latency if + // it exists before defaulting to MinLatency. + if (ItinData->SchedModel->MinLatency >= 0) + return TII->getInstrLatency(ItinData, DefMI); + + // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. + // For empty itineraries, short-cirtuit the check and default to one cycle. + if (ItinData->isEmpty()) + return 1; + } + else if(ItinData->isEmpty()) + return TII->defaultDefLatency(ItinData->SchedModel, DefMI); + + // ...operand lookup required + return -1; +} + +/// computeOperandLatency - Compute and return the latency of the given data +/// dependent def and use when the operand indices are already known. +/// +/// FindMin may be set to get the minimum vs. expected latency. +unsigned TargetInstrInfo:: +computeOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx, + bool FindMin) const { + + int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); + if (DefLatency >= 0) + return DefLatency; + + assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); + + int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + if (OperLatency >= 0) + return OperLatency; + + // No operand latency was found. + unsigned InstrLatency = getInstrLatency(ItinData, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + if (!FindMin) + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); + return InstrLatency; +} + +/// computeOperandLatency - Compute and return the latency of the given data +/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an +/// unknown use. Depending on the subtarget's itinerary properties, this may or +/// may not need to call getOperandLatency(). +/// +/// FindMin may be set to get the minimum vs. expected latency. Minimum +/// latency is used for scheduling groups, while expected latency is for +/// instruction cost and critical path. +/// +/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency. +/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use. +unsigned TargetInstrInfo:: +computeOperandLatency(const InstrItineraryData *ItinData, + const TargetRegisterInfo *TRI, + const MachineInstr *DefMI, const MachineInstr *UseMI, + unsigned Reg, bool FindMin) const { + + int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); + if (DefLatency >= 0) + return DefLatency; + + assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); + + // Find the definition of the register in the defining instruction. + int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); + if (DefIdx != -1) { + const MachineOperand &MO = DefMI->getOperand(DefIdx); + if (MO.isReg() && MO.isImplicit() && + DefIdx >= (int)DefMI->getDesc().getNumOperands()) { + // This is an implicit def, getOperandLatency() won't return the correct + // latency. e.g. + // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> + // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... + // What we want is to compute latency between def of %D6/%D7 and use of + // %Q3 instead. + unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); + if (DefMI->getOperand(Op2).isReg()) + DefIdx = Op2; + } + // For all uses of the register, calculate the maxmimum latency + int OperLatency = -1; + + // UseMI is null, then it must be a scheduling barrier. + if (!UseMI) { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); + } + else { + for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = UseMI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (MOReg != Reg) + continue; + + int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i); + OperLatency = std::max(OperLatency, UseCycle); + } + } + // If we found an operand latency, we're done. + if (OperLatency >= 0) + return OperLatency; + } + // No operand latency was found. + unsigned InstrLatency = getInstrLatency(ItinData, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + if (!FindMin) + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); + return InstrLatency; +} diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 9925185..2a2fa9e 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -93,8 +93,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) { // N.B.: The defaults used in here are no the same ones used in MC. // We follow gcc, MC follows gas. For example, given ".section .eh_frame", // both gas and MC will produce a section with no flags. Given - // section(".eh_frame") gcc will produce - // .section .eh_frame,"a",@progbits + // section(".eh_frame") gcc will produce: + // + // .section .eh_frame,"a",@progbits if (Name.empty() || Name[0] != '.') return K; // Some lame default implementation based on some magic section names. @@ -349,10 +350,17 @@ TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { if (Priority == 65535) return StaticCtorSection; - std::string Name = std::string(".ctors.") + utostr(65535 - Priority); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); + if (UseInitArray) { + std::string Name = std::string(".init_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); + } else { + std::string Name = std::string(".ctors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); + } } const MCSection * @@ -362,10 +370,35 @@ TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const { if (Priority == 65535) return StaticDtorSection; - std::string Name = std::string(".dtors.") + utostr(65535 - Priority); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); + if (UseInitArray) { + std::string Name = std::string(".fini_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); + } else { + std::string Name = std::string(".dtors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); + } +} + +void +TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { + UseInitArray = UseInitArray_; + if (!UseInitArray) + return; + + StaticCtorSection = + getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); + StaticDtorSection = + getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); } //===----------------------------------------------------------------------===// @@ -379,7 +412,7 @@ emitModuleFlags(MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, Mangler *Mang, const TargetMachine &TM) const { unsigned VersionVal = 0; - unsigned GCFlags = 0; + unsigned ImageInfoFlags = 0; StringRef SectionVal; for (ArrayRef<Module::ModuleFlagEntry>::iterator @@ -396,8 +429,9 @@ emitModuleFlags(MCStreamer &Streamer, if (Key == "Objective-C Image Info Version") VersionVal = cast<ConstantInt>(Val)->getZExtValue(); else if (Key == "Objective-C Garbage Collection" || - Key == "Objective-C GC Only") - GCFlags |= cast<ConstantInt>(Val)->getZExtValue(); + Key == "Objective-C GC Only" || + Key == "Objective-C Is Simulated") + ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue(); else if (Key == "Objective-C Image Info Section") SectionVal = cast<MDString>(Val)->getString(); } @@ -424,7 +458,7 @@ emitModuleFlags(MCStreamer &Streamer, Streamer.EmitLabel(getContext(). GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); Streamer.EmitIntValue(VersionVal, 4); - Streamer.EmitIntValue(GCFlags, 4); + Streamer.EmitIntValue(ImageInfoFlags, 4); Streamer.AddBlankLine(); } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index c30b133..aa601af 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -30,6 +30,7 @@ #define DEBUG_TYPE "twoaddrinstr" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -55,18 +56,19 @@ STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); -STATISTIC(NumReMats, "Number of instructions re-materialized"); -STATISTIC(NumDeletes, "Number of dead instructions deleted"); STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { class TwoAddressInstructionPass : public MachineFunctionPass { + MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; + SlotIndexes *Indexes; + LiveIntervals *LIS; AliasAnalysis *AA; CodeGenOpt::Level OptLevel; @@ -92,17 +94,10 @@ namespace { unsigned Reg, MachineBasicBlock::iterator OldPos); - bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC, - MachineInstr *MI, MachineInstr *DefMI, - MachineBasicBlock *MBB, unsigned Loc); - bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, unsigned &LastDef); - MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB, - unsigned Dist); - - bool isProfitableToCommute(unsigned regB, unsigned regC, + bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist); @@ -117,14 +112,6 @@ namespace { MachineFunction::iterator &mbbi, unsigned RegA, unsigned RegB, unsigned Dist); - typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill; - bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, - SmallVector<NewKill, 4> &NewKills, - MachineBasicBlock *MBB, unsigned Dist); - bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, unsigned Dist); - bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI, MachineBasicBlock *MBB); @@ -150,6 +137,11 @@ namespace { void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &Processed); + typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList; + typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); + void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg); /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part @@ -167,6 +159,8 @@ namespace { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<LiveVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -241,7 +235,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, // appropriate location, we can try to sink the current instruction // past it. if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || - KillMI->isTerminator()) + KillMI == OldPos || KillMI->isTerminator()) return false; // If any of the definitions are used by another instruction between the @@ -284,6 +278,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, } } } + assert(KillMO && "Didn't find kill"); // Update kill and LV information. KillMO->setIsKill(false); @@ -297,59 +292,13 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, MBB->remove(MI); MBB->insert(KillPos, MI); + if (LIS) + LIS->handleMove(MI); + ++Num3AddrSunk; return true; } -/// isTwoAddrUse - Return true if the specified MI is using the specified -/// register as a two-address operand. -static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { - const MCInstrDesc &MCID = UseMI->getDesc(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { - MachineOperand &MO = UseMI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && - (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) - // Earlier use is a two-address one. - return true; - } - return false; -} - -/// isProfitableToReMat - Return true if the heuristics determines it is likely -/// to be profitable to re-materialize the definition of Reg rather than copy -/// the register. -bool -TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, - const TargetRegisterClass *RC, - MachineInstr *MI, MachineInstr *DefMI, - MachineBasicBlock *MBB, unsigned Loc) { - bool OtherUse = false; - for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = UseMO.getParent(); - MachineBasicBlock *UseMBB = UseMI->getParent(); - if (UseMBB == MBB) { - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); - if (DI != DistanceMap.end() && DI->second == Loc) - continue; // Current use. - OtherUse = true; - // There is at least one other use in the MBB that will clobber the - // register. - if (isTwoAddrUse(UseMI, Reg)) - return true; - } - } - - // If other uses in MBB are not two-address uses, then don't remat. - if (OtherUse) - return false; - - // No other uses in the same block, remat if it's defined in the same - // block so it does not unnecessarily extend the live range. - return MBB == DefMI->getParent(); -} - /// NoUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last @@ -377,31 +326,6 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, return !(LastUse > LastDef && LastUse < Dist); } -MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg, - MachineBasicBlock *MBB, - unsigned Dist) { - unsigned LastUseDist = 0; - MachineInstr *LastUse = 0; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), - E = MRI->reg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); - MachineInstr *MI = MO.getParent(); - if (MI->getParent() != MBB || MI->isDebugValue()) - continue; - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); - if (DI == DistanceMap.end()) - continue; - if (DI->second >= Dist) - continue; - - if (MO.isUse() && DI->second > LastUseDist) { - LastUse = DI->first; - LastUseDist = DI->second; - } - } - return LastUse; -} - /// isCopyToReg - Return true if the specified MI is a copy instruction or /// a extract_subreg instruction. It also returns the source and destination /// registers and whether they are physical registers by reference. @@ -483,32 +407,6 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { return false; } -/// findLocalKill - Look for an instruction below MI in the MBB that kills the -/// specified register. Returns null if there are any other Reg use between the -/// instructions. -static -MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB, - MachineInstr *MI, MachineRegisterInfo *MRI, - DenseMap<MachineInstr*, unsigned> &DistanceMap) { - MachineInstr *KillMI = 0; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(Reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI == MI || UseMI->getParent() != MBB) - continue; - if (DistanceMap.count(UseMI)) - continue; - if (!UI.getOperand().isKill()) - return 0; - if (KillMI) - return 0; // -O0 kill markers cannot be trusted? - KillMI = UseMI; - } - - return KillMI; -} - /// findOnlyInterestingUse - Given a register, if has a single in-basic block /// use, return the use instruction if it's a copy or a two-address use. static @@ -564,10 +462,11 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { } -/// isProfitableToReMat - Return true if it's potentially profitable to commute +/// isProfitableToCommute - Return true if it's potentially profitable to commute /// the two-address instruction that's being processed. bool -TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, +TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, + unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist) { if (OptLevel == CodeGenOpt::None) @@ -604,15 +503,15 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, // %reg1026<def> = ADD %reg1024, %reg1025 // r0 = MOV %reg1026 // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. - unsigned FromRegB = getMappedReg(regB, SrcRegMap); - unsigned FromRegC = getMappedReg(regC, SrcRegMap); - unsigned ToRegB = getMappedReg(regB, DstRegMap); - unsigned ToRegC = getMappedReg(regC, DstRegMap); - if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) && - ((!FromRegC && !ToRegC) || - regsAreCompatible(FromRegB, ToRegC, TRI) || - regsAreCompatible(FromRegC, ToRegB, TRI))) - return true; + unsigned ToRegA = getMappedReg(regA, DstRegMap); + if (ToRegA) { + unsigned FromRegB = getMappedReg(regB, SrcRegMap); + unsigned FromRegC = getMappedReg(regC, SrcRegMap); + bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI); + bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI); + if (BComp != CComp) + return !BComp && CComp; + } // If there is a use of regC between its last def (could be livein) and this // instruction, then bail. @@ -653,6 +552,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, if (LV) // Update live variables LV->replaceKillInstruction(RegC, MI, NewMI); + if (Indexes) + Indexes->replaceMachineInstrInMaps(MI, NewMI); mbbi->insert(mi, NewMI); // Insert the new inst mbbi->erase(mi); // Nuke the old inst. @@ -701,6 +602,9 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; + if (Indexes) + Indexes->replaceMachineInstrInMaps(mi, NewMI); + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) // FIXME: Temporary workaround. If the new instruction doesn't // uses RegB, convertToThreeAddress must have created more @@ -810,92 +714,6 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, return; } -/// isSafeToDelete - If the specified instruction does not produce any side -/// effects and all of its defs are dead, then it's safe to delete. -static bool isSafeToDelete(MachineInstr *MI, - const TargetInstrInfo *TII, - SmallVector<unsigned, 4> &Kills) { - if (MI->mayStore() || MI->isCall()) - return false; - if (MI->isTerminator() || MI->hasUnmodeledSideEffects()) - return false; - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.isDef() && !MO.isDead()) - return false; - if (MO.isUse() && MO.isKill()) - Kills.push_back(MO.getReg()); - } - return true; -} - -/// canUpdateDeletedKills - Check if all the registers listed in Kills are -/// killed by instructions in MBB preceding the current instruction at -/// position Dist. If so, return true and record information about the -/// preceding kills in NewKills. -bool TwoAddressInstructionPass:: -canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, - SmallVector<NewKill, 4> &NewKills, - MachineBasicBlock *MBB, unsigned Dist) { - while (!Kills.empty()) { - unsigned Kill = Kills.back(); - Kills.pop_back(); - if (TargetRegisterInfo::isPhysicalRegister(Kill)) - return false; - - MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist); - if (!LastKill) - return false; - - bool isModRef = LastKill->definesRegister(Kill); - NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef), - LastKill)); - } - return true; -} - -/// DeleteUnusedInstr - If an instruction with a tied register operand can -/// be safely deleted, just delete it. -bool -TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned Dist) { - // Check if the instruction has no side effects and if all its defs are dead. - SmallVector<unsigned, 4> Kills; - if (!isSafeToDelete(mi, TII, Kills)) - return false; - - // If this instruction kills some virtual registers, we need to - // update the kill information. If it's not possible to do so, - // then bail out. - SmallVector<NewKill, 4> NewKills; - if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist)) - return false; - - if (LV) { - while (!NewKills.empty()) { - MachineInstr *NewKill = NewKills.back().second; - unsigned Kill = NewKills.back().first.first; - bool isDead = NewKills.back().first.second; - NewKills.pop_back(); - if (LV->removeVirtualRegisterKilled(Kill, mi)) { - if (isDead) - LV->addVirtualRegisterDead(Kill, NewKill); - else - LV->addVirtualRegisterKilled(Kill, NewKill); - } - } - } - - mbbi->erase(mi); // Nuke the old inst. - mi = nmi; - return true; -} - /// RescheduleMIBelowKill - If there is one more local instruction that reads /// 'Reg' and it kills 'Reg, consider moving the instruction below the kill /// instruction in order to eliminate the need for the copy. @@ -904,14 +722,19 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { + // Bail immediately if we don't have LV available. We use it to find kills + // efficiently. + if (!LV) + return false; + MachineInstr *MI = &*mi; DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); if (DI == DistanceMap.end()) // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); - if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -998,6 +821,12 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg))) // Don't want to extend other live ranges and update kills. return false; + if (MOReg == Reg && !MO.isKill()) + // We can't schedule across a use of the register in question. + return false; + // Ensure that if this is register in question, its the kill we expect. + assert((MOReg != Reg || OtherMI == KillMI) && + "Found multiple kills of a register in a basic block"); } } } @@ -1011,20 +840,13 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, MBB->splice(KillPos, MBB, From, To); DistanceMap.erase(DI); - if (LV) { - // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - } else { - for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = KillMI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) - continue; - MO.setIsKill(false); - } - MI->addRegisterKilled(Reg, 0); - } + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + if (LIS) + LIS->handleMove(MI); + DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); return true; } @@ -1045,7 +867,7 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, return true; // Below MI unsigned DefDist = DDI->second; assert(Dist > DefDist && "Visited def already?"); - if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist)) + if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist)) return true; } return false; @@ -1060,14 +882,19 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { + // Bail immediately if we don't have LV available. We use it to find kills + // efficiently. + if (!LV) + return false; + MachineInstr *MI = &*mi; DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); if (DI == DistanceMap.end()) // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); - if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -1093,6 +920,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, continue; if (isDefTooClose(MOReg, DI->second, MI, MBB)) return false; + if (MOReg == Reg && !MO.isKill()) + return false; Uses.insert(MOReg); if (MO.isKill() && MOReg != Reg) Kills.insert(MOReg); @@ -1134,6 +963,9 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, if (Kills.count(MOReg)) // Don't want to extend other live ranges and update kills. return false; + if (OtherMI != MI && MOReg == Reg && !MO.isKill()) + // We can't schedule across a use of the register in question. + return false; } else { OtherDefs.push_back(MOReg); } @@ -1164,19 +996,13 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr. DistanceMap.erase(DI); - if (LV) { - // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - } else { - for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = KillMI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) - continue; - MO.setIsKill(false); - } - MI->addRegisterKilled(Reg, 0); - } + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + if (LIS) + LIS->handleMove(KillMI); + + DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); return true; } @@ -1201,15 +1027,10 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); - - // If regA is dead and the instruction can be deleted, just delete - // it so it doesn't clobber regB. bool regBKilled = isKilled(MI, regB, MRI, TII); - if (!regBKilled && MI.getOperand(DstIdx).isDead() && - DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { - ++NumDeletes; - return true; // Done with this instruction. - } + + if (TargetRegisterInfo::isVirtualRegister(regA)) + ScanUses(regA, &*mbbi, Processed); // Check if it is profitable to commute the operands. unsigned SrcOp1, SrcOp2; @@ -1230,7 +1051,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; - else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) { + else if (isProfitableToCommute(regA, regB, regC, &MI, mbbi, Dist)) { TryCommute = true; AggressiveCommute = true; } @@ -1252,9 +1073,6 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return true; } - if (TargetRegisterInfo::isVirtualRegister(regA)) - ScanUses(regA, &*mbbi, Processed); - if (MI.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. @@ -1293,15 +1111,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, if (NewOpc != 0) { const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); if (UnfoldMCID.getNumDefs() == 1) { - MachineFunction &MF = *mbbi->getParent(); - // Unfold the load. DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = - TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI); + TRI->getAllocatableClass( + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; - if (!TII->unfoldMemoryOperand(MF, &MI, Reg, + if (!TII->unfoldMemoryOperand(*MF, &MI, Reg, /*UnfoldLoad=*/true,/*UnfoldStore=*/false, NewMIs)) { DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); @@ -1378,15 +1195,177 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } +// Collect tied operands of MI that need to be handled. +// Rewrite trivial cases immediately. +// Return true if any tied operands where found, including the trivial ones. +bool TwoAddressInstructionPass:: +collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { + const MCInstrDesc &MCID = MI->getDesc(); + bool AnyOps = false; + unsigned NumOps = MI->isInlineAsm() ? + MI->getNumOperands() : MCID.getNumOperands(); + + for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { + unsigned DstIdx = 0; + if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx)) + continue; + AnyOps = true; + MachineOperand &SrcMO = MI->getOperand(SrcIdx); + MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned SrcReg = SrcMO.getReg(); + unsigned DstReg = DstMO.getReg(); + // Tied constraint already satisfied? + if (SrcReg == DstReg) + continue; + + assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); + + // Deal with <undef> uses immediately - simply rewrite the src operand. + if (SrcMO.isUndef()) { + // Constrain the DstReg register class if required. + if (TargetRegisterInfo::isVirtualRegister(DstReg)) + if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, + TRI, *MF)) + MRI->constrainRegClass(DstReg, RC); + SrcMO.setReg(DstReg); + DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); + continue; + } + TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx)); + } + return AnyOps; +} + +// Process a list of tied MI operands that all use the same source register. +// The tied pairs are of the form (SrcIdx, DstIdx). +void +TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, + TiedPairList &TiedPairs, + unsigned &Dist) { + bool IsEarlyClobber = false; + bool RemovedKillFlag = false; + bool AllUsesCopied = true; + unsigned LastCopiedReg = 0; + unsigned RegB = 0; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + unsigned SrcIdx = TiedPairs[tpi].first; + unsigned DstIdx = TiedPairs[tpi].second; + + const MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned RegA = DstMO.getReg(); + IsEarlyClobber |= DstMO.isEarlyClobber(); + + // Grab RegB from the instruction because it may have changed if the + // instruction was commuted. + RegB = MI->getOperand(SrcIdx).getReg(); + + if (RegA == RegB) { + // The register is tied to multiple destinations (or else we would + // not have continued this far), but this use of the register + // already matches the tied destination. Leave it. + AllUsesCopied = false; + continue; + } + LastCopiedReg = RegA; + + assert(TargetRegisterInfo::isVirtualRegister(RegB) && + "cannot make instruction into two-address form"); + +#ifndef NDEBUG + // First, verify that we don't have a use of "a" in the instruction + // (a = b + a for example) because our transformation will not + // work. This should never occur because we are in SSA form. + for (unsigned i = 0; i != MI->getNumOperands(); ++i) + assert(i == DstIdx || + !MI->getOperand(i).isReg() || + MI->getOperand(i).getReg() != RegA); +#endif + + // Emit a copy. + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), RegA).addReg(RegB); + + // Update DistanceMap. + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + DistanceMap.insert(std::make_pair(PrevMI, Dist)); + DistanceMap[MI] = ++Dist; + + SlotIndex CopyIdx; + if (Indexes) + CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot(); + + DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); + + MachineOperand &MO = MI->getOperand(SrcIdx); + assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && + "inconsistent operand info for 2-reg pass"); + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + + // Make sure regA is a legal regclass for the SrcIdx operand. + if (TargetRegisterInfo::isVirtualRegister(RegA) && + TargetRegisterInfo::isVirtualRegister(RegB)) + MRI->constrainRegClass(RegA, MRI->getRegClass(RegB)); + + MO.setReg(RegA); + + // Propagate SrcRegMap. + SrcRegMap[RegA] = RegB; + } + + + if (AllUsesCopied) { + if (!IsEarlyClobber) { + // Replace other (un-tied) uses of regB with LastCopiedReg. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); + } + } + } + + // Update live variables for regB. + if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) { + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + LV->addVirtualRegisterKilled(RegB, PrevMI); + } + + } else if (RemovedKillFlag) { + // Some tied uses of regB matched their destination registers, so + // regB is still used in this instruction, but a kill flag was + // removed from a different tied use of regB, so now we need to add + // a kill flag to one of the remaining uses of regB. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + MO.setIsKill(true); + break; + } + } + } +} + /// runOnMachineFunction - Reduce two-address instructions to two operands. /// -bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - const TargetMachine &TM = MF.getTarget(); - MRI = &MF.getRegInfo(); +bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + const TargetMachine &TM = MF->getTarget(); + MRI = &MF->getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); InstrItins = TM.getInstrItineraryData(); + Indexes = getAnalysisIfAvailable<SlotIndexes>(); LV = getAnalysisIfAvailable<LiveVariables>(); + LIS = getAnalysisIfAvailable<LiveIntervals>(); AA = &getAnalysis<AliasAnalysis>(); OptLevel = TM.getOptLevel(); @@ -1394,20 +1373,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(dbgs() << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << MF->getFunction()->getName() << '\n'); // This pass takes the function out of SSA form. MRI->leaveSSA(); - // ReMatRegs - Keep track of the registers whose def's are remat'ed. - BitVector ReMatRegs(MRI->getNumVirtRegs()); - - typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> > - TiedOperandMap; - TiedOperandMap TiedOperands(4); + TiedOperandMap TiedOperands; SmallPtrSet<MachineInstr*, 8> Processed; - for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); mbbi != mbbe; ++mbbi) { unsigned Dist = 0; DistanceMap.clear(); @@ -1426,188 +1400,63 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { if (mi->isRegSequence()) RegSequences.push_back(&*mi); - const MCInstrDesc &MCID = mi->getDesc(); - bool FirstTied = true; - DistanceMap.insert(std::make_pair(mi, ++Dist)); ProcessCopy(&*mi, &*mbbi, Processed); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. - unsigned NumOps = mi->isInlineAsm() - ? mi->getNumOperands() : MCID.getNumOperands(); - for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { - unsigned DstIdx = 0; - if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) - continue; - - if (FirstTied) { - FirstTied = false; - ++NumTwoAddressInstrs; - DEBUG(dbgs() << '\t' << *mi); - } - - assert(mi->getOperand(SrcIdx).isReg() && - mi->getOperand(SrcIdx).getReg() && - mi->getOperand(SrcIdx).isUse() && - "two address instruction invalid"); - - unsigned regB = mi->getOperand(SrcIdx).getReg(); - TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx)); + if (!collectTiedOperands(mi, TiedOperands)) { + mi = nmi; + continue; } - // Now iterate over the information collected above. - for (TiedOperandMap::iterator OI = TiedOperands.begin(), - OE = TiedOperands.end(); OI != OE; ++OI) { - SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second; - - // If the instruction has a single pair of tied operands, try some - // transformations that may either eliminate the tied operands or - // improve the opportunities for coalescing away the register copy. - if (TiedOperands.size() == 1 && TiedPairs.size() == 1) { + ++NumTwoAddressInstrs; + MadeChange = true; + DEBUG(dbgs() << '\t' << *mi); + + // If the instruction has a single pair of tied operands, try some + // transformations that may either eliminate the tied operands or + // improve the opportunities for coalescing away the register copy. + if (TiedOperands.size() == 1) { + SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs + = TiedOperands.begin()->second; + if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; unsigned DstIdx = TiedPairs[0].second; - - // If the registers are already equal, nothing needs to be done. - if (mi->getOperand(SrcIdx).getReg() == - mi->getOperand(DstIdx).getReg()) - break; // Done with this instruction. - - if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, - Processed)) - break; // The tied operands have been eliminated. - } - - bool IsEarlyClobber = false; - bool RemovedKillFlag = false; - bool AllUsesCopied = true; - unsigned LastCopiedReg = 0; - unsigned regB = OI->first; - for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { - unsigned SrcIdx = TiedPairs[tpi].first; - unsigned DstIdx = TiedPairs[tpi].second; - - const MachineOperand &DstMO = mi->getOperand(DstIdx); - unsigned regA = DstMO.getReg(); - IsEarlyClobber |= DstMO.isEarlyClobber(); - - // Grab regB from the instruction because it may have changed if the - // instruction was commuted. - regB = mi->getOperand(SrcIdx).getReg(); - - if (regA == regB) { - // The register is tied to multiple destinations (or else we would - // not have continued this far), but this use of the register - // already matches the tied destination. Leave it. - AllUsesCopied = false; + unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); + unsigned DstReg = mi->getOperand(DstIdx).getReg(); + if (SrcReg != DstReg && + TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, + Processed)) { + // The tied operands have been eliminated or shifted further down the + // block to ease elimination. Continue processing with 'nmi'. + TiedOperands.clear(); + mi = nmi; continue; } - LastCopiedReg = regA; - - assert(TargetRegisterInfo::isVirtualRegister(regB) && - "cannot make instruction into two-address form"); - -#ifndef NDEBUG - // First, verify that we don't have a use of "a" in the instruction - // (a = b + a for example) because our transformation will not - // work. This should never occur because we are in SSA form. - for (unsigned i = 0; i != mi->getNumOperands(); ++i) - assert(i == DstIdx || - !mi->getOperand(i).isReg() || - mi->getOperand(i).getReg() != regA); -#endif - - // Emit a copy or rematerialize the definition. - const TargetRegisterClass *rc = MRI->getRegClass(regB); - MachineInstr *DefMI = MRI->getVRegDef(regB); - // If it's safe and profitable, remat the definition instead of - // copying it. - if (DefMI && - DefMI->isAsCheapAsAMove() && - DefMI->isSafeToReMat(TII, AA, regB) && - isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ - DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); - unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); - TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI); - ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB)); - ++NumReMats; - } else { - BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY), - regA).addReg(regB); - } - - MachineBasicBlock::iterator prevMI = prior(mi); - // Update DistanceMap. - DistanceMap.insert(std::make_pair(prevMI, Dist)); - DistanceMap[mi] = ++Dist; - - DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI); - - MachineOperand &MO = mi->getOperand(SrcIdx); - assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && - "inconsistent operand info for 2-reg pass"); - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; - } - MO.setReg(regA); } + } - if (AllUsesCopied) { - if (!IsEarlyClobber) { - // Replace other (un-tied) uses of regB with LastCopiedReg. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; - } - MO.setReg(LastCopiedReg); - } - } - } - - // Update live variables for regB. - if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) - LV->addVirtualRegisterKilled(regB, prior(mi)); - - } else if (RemovedKillFlag) { - // Some tied uses of regB matched their destination registers, so - // regB is still used in this instruction, but a kill flag was - // removed from a different tied use of regB, so now we need to add - // a kill flag to one of the remaining uses of regB. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - MO.setIsKill(true); - break; - } - } - } - - // Schedule the source copy / remat inserted to form two-address - // instruction. FIXME: Does it matter the distance map may not be - // accurate after it's scheduled? - TII->scheduleTwoAddrSource(prior(mi), mi, *TRI); - - MadeChange = true; - + // Now iterate over the information collected above. + for (TiedOperandMap::iterator OI = TiedOperands.begin(), + OE = TiedOperands.end(); OI != OE; ++OI) { + processTiedPairs(mi, OI->second, Dist); DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); + } - // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. - if (mi->isInsertSubreg()) { - // From %reg = INSERT_SUBREG %reg, %subreg, subidx - // To %reg:subidx = COPY %subreg - unsigned SubIdx = mi->getOperand(3).getImm(); - mi->RemoveOperand(3); - assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); - mi->getOperand(0).setSubReg(SubIdx); - mi->RemoveOperand(1); - mi->setDesc(TII->get(TargetOpcode::COPY)); - DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); - } + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef()); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); } // Clear TiedOperands here instead of at the top of the loop @@ -1617,15 +1466,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } } - // Some remat'ed instructions are dead. - for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(i); - if (MRI->use_nodbg_empty(VReg)) { - MachineInstr *DefMI = MRI->getVRegDef(VReg); - DefMI->eraseFromParent(); - } - } - // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve // SSA form. It's now safe to de-SSA. MadeChange |= EliminateRegSequences(); @@ -1694,9 +1534,10 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, continue; // Check that the instructions are all in the same basic block. - MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg); - MachineInstr *DstDefMI = MRI->getVRegDef(DstReg); - if (SrcDefMI->getParent() != DstDefMI->getParent()) + MachineInstr *SrcDefMI = MRI->getUniqueVRegDef(SrcReg); + MachineInstr *DstDefMI = MRI->getUniqueVRegDef(DstReg); + if (!SrcDefMI || !DstDefMI || + SrcDefMI->getParent() != DstDefMI->getParent()) continue; // If there are no other uses than copies which feed into @@ -1832,6 +1673,11 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { SmallVector<unsigned, 4> RealSrcs; SmallSet<unsigned, 4> Seen; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + // Nothing needs to be inserted for <undef> operands. + if (MI->getOperand(i).isUndef()) { + MI->getOperand(i).setReg(0); + continue; + } unsigned SrcReg = MI->getOperand(i).getReg(); unsigned SrcSubIdx = MI->getOperand(i).getSubReg(); unsigned SubIdx = MI->getOperand(i+1).getImm(); @@ -1841,7 +1687,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { MachineInstr *DefMI = NULL; if (!MI->getOperand(i).getSubReg() && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) { - DefMI = MRI->getVRegDef(SrcReg); + DefMI = MRI->getUniqueVRegDef(SrcReg); } if (DefMI && DefMI->isImplicitDef()) { diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 3bab93b..93840f0 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -18,12 +18,14 @@ #define DEBUG_TYPE "regalloc" #include "VirtRegMap.h" +#include "LiveDebugVariables.h" #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -104,11 +106,149 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { Virt2StackSlotMap[virtReg] = SS; } -void VirtRegMap::rewrite(SlotIndexes *Indexes) { +void VirtRegMap::print(raw_ostream &OS, const Module*) const { + OS << "********** REGISTER MAP **********\n"; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { + OS << '[' << PrintReg(Reg, TRI) << " -> " + << PrintReg(Virt2PhysMap[Reg], TRI) << "] " + << MRI->getRegClass(Reg)->getName() << "\n"; + } + } + + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { + OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] + << "] " << MRI->getRegClass(Reg)->getName() << "\n"; + } + } + OS << '\n'; +} + +void VirtRegMap::dump() const { + print(dbgs()); +} + +//===----------------------------------------------------------------------===// +// VirtRegRewriter +//===----------------------------------------------------------------------===// +// +// The VirtRegRewriter is the last of the register allocator passes. +// It rewrites virtual registers to physical registers as specified in the +// VirtRegMap analysis. It also updates live-in information on basic blocks +// according to LiveIntervals. +// +namespace { +class VirtRegRewriter : public MachineFunctionPass { + MachineFunction *MF; + const TargetMachine *TM; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + SlotIndexes *Indexes; + LiveIntervals *LIS; + VirtRegMap *VRM; + + void rewrite(); + void addMBBLiveIns(); +public: + static char ID; + VirtRegRewriter() : MachineFunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual bool runOnMachineFunction(MachineFunction&); +}; +} // end anonymous namespace + +char &llvm::VirtRegRewriterID = VirtRegRewriter::ID; + +INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter", + "Virtual Register Rewriter", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", + "Virtual Register Rewriter", false, false) + +char VirtRegRewriter::ID = 0; + +void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<LiveIntervals>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveDebugVariables>(); + AU.addRequired<VirtRegMap>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { + MF = &fn; + TM = &MF->getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + MRI = &MF->getRegInfo(); + Indexes = &getAnalysis<SlotIndexes>(); + LIS = &getAnalysis<LiveIntervals>(); + VRM = &getAnalysis<VirtRegMap>(); DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " << MF->getFunction()->getName() << '\n'); - DEBUG(dump()); + DEBUG(VRM->dump()); + + // Add kill flags while we still have virtual registers. + LIS->addKillFlags(); + + // Live-in lists on basic blocks are required for physregs. + addMBBLiveIns(); + + // Rewrite virtual registers. + rewrite(); + + // Write out new DBG_VALUE instructions. + getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); + + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers and release all the transient data. + VRM->clearAllVirt(); + MRI->clearVirtRegs(); + return true; +} + +// Compute MBB live-in lists from virtual register live ranges and their +// assignments. +void VirtRegRewriter::addMBBLiveIns() { + SmallVector<MachineBasicBlock*, 16> LiveIn; + for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { + unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx); + if (MRI->reg_nodbg_empty(VirtReg)) + continue; + LiveInterval &LI = LIS->getInterval(VirtReg); + if (LI.empty() || LIS->intervalIsInOneMBB(LI)) + continue; + // This is a virtual register that is live across basic blocks. Its + // assigned PhysReg must be marked as live-in to those blocks. + unsigned PhysReg = VRM->getPhys(VirtReg); + assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); + + // Scan the segments of LI. + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I != E; + ++I) { + if (!Indexes->findLiveInMBBs(I->start, I->end, LiveIn)) + continue; + for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) + if (!LiveIn[i]->isLiveIn(PhysReg)) + LiveIn[i]->addLiveIn(PhysReg); + LiveIn.clear(); + } + } +} + +void VirtRegRewriter::rewrite() { SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; @@ -135,8 +275,9 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; unsigned VirtReg = MO.getReg(); - unsigned PhysReg = getPhys(VirtReg); - assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg"); + unsigned PhysReg = VRM->getPhys(VirtReg); + assert(PhysReg != VirtRegMap::NO_PHYS_REG && + "Instruction uses unmapped VirtReg"); assert(!Reserved.test(PhysReg) && "Reserved register assignment"); // Preserve semantics of sub-register operands. @@ -207,31 +348,3 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { if (!MRI->reg_nodbg_empty(Reg)) MRI->setPhysRegUsed(Reg); } - -void VirtRegMap::print(raw_ostream &OS, const Module* M) const { - const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); - const MachineRegisterInfo &MRI = MF->getRegInfo(); - - OS << "********** REGISTER MAP **********\n"; - for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { - OS << '[' << PrintReg(Reg, TRI) << " -> " - << PrintReg(Virt2PhysMap[Reg], TRI) << "] " - << MRI.getRegClass(Reg)->getName() << "\n"; - } - } - - for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { - OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] - << "] " << MRI.getRegClass(Reg)->getName() << "\n"; - } - } - OS << '\n'; -} - -void VirtRegMap::dump() const { - print(dbgs()); -} diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index 8cac311..c320985 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -177,13 +177,6 @@ namespace llvm { /// the specified stack slot void assignVirt2StackSlot(unsigned virtReg, int frameIndex); - /// rewrite - Rewrite all instructions in MF to use only physical registers - /// by mapping all virtual register operands to their assigned physical - /// registers. - /// - /// @param Indexes Optionally remove deleted instructions from indexes. - void rewrite(SlotIndexes *Indexes); - void print(raw_ostream &OS, const Module* M = 0) const; void dump() const; }; |